Advertisement
diagnoze

node

Dec 7th, 2016
107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 15.95 KB | None | 0 0
  1. /**
  2. * Created by ASUS-PC on 11/8/2016.
  3. */
  4. var FeedParser = require('feedparser'),
  5. request = require('request'),
  6. http = require('http'),
  7. https = require('https'),
  8. striptags = require('striptags'),
  9. sizeOf = require('image-size'),
  10. size = require('request-image-size'),
  11. Slack = require('slack-node'),
  12. mysql = require('mysql'),
  13. crypto = require('crypto'),
  14. Emitter = require('tiny-emitter'),
  15. errors = []
  16. ;
  17.  
  18. var emitter = new Emitter();
  19.  
  20. //this is for slack
  21. //webhookUri = "https://hooks.slack.com/services/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";
  22. webhookUri = "";
  23.  
  24. slack = new Slack();
  25. slack.setWebhook(webhookUri);
  26. // end slack
  27.  
  28. // setting mysql config
  29. var connection = mysql.createConnection({
  30. host: "localhost",
  31. user: "root",
  32. password: '',
  33. //user: "intrafeed",
  34. //password: 'Palmerah$$$i9i7',
  35. database: "intrafeed",
  36. charset: 'utf8mb4'
  37. //waitForConnections : true
  38. // multipleStatements: true
  39. });
  40.  
  41. connection.connect(function (err) {
  42. if (err) {
  43. console.log('Error connecting to Db');
  44. return;
  45. }
  46. console.log('Connection established');
  47. });
  48.  
  49. var intrafeedLinks = "https://intrafeed.co:9000/apit/rssChannels";
  50.  
  51. /*
  52. End settings and params
  53. */
  54.  
  55. //fetch('http://rss.detik.com/index.php/detikcom_nasional', 63, '416a5423119a4ec081ca4dd4d948f27f');
  56.  
  57. start();
  58. //5 minutes
  59. setInterval(function () {
  60. start();
  61. }, 5 * 60 * 1000);
  62.  
  63.  
  64. function start() {
  65. var channels = null;
  66.  
  67. request({url: intrafeedLinks, json: true}, function (error, response, body) {
  68. if (!error && response.statusCode == 200) {
  69. console.log(body); // Show the HTML for the Google homepage.
  70. channels = body.channels;
  71. for (i = 0; i < channels.length; i++) {
  72.  
  73. //fetch link
  74. //fetch(channels[i].link, channels[i].channelid, channels[i].token);
  75. emitter.emit('fetch', channels[i].link, channels[i].channelid, channels[i].token);
  76. }
  77. }
  78. });
  79. }
  80.  
  81. emitter.on("fetch", function (link, channelId, channelToken) {
  82. fetch(link, channelId, channelToken);
  83. });
  84.  
  85. function fetch(link, channelId, channelToken) {
  86. var req = request(link);
  87. //var feedparser = new FeedParser();
  88. var feedparser = new FeedParser({addmeta: false, resume_saxerror: false});
  89.  
  90. req.on('error', function (error) {
  91. // handle any request errors
  92. });
  93.  
  94. var itemArray = [];
  95. var countItemArray = 0;
  96.  
  97. req.on('response', function (res) {
  98. var stream = this;
  99.  
  100. if (res.statusCode != 200) {
  101. var txt = "Error Request status Code " + res.statusCode + " link : " + link + " channelid :" + channelId + " error result: " + err;
  102. if (isNaN(errors[channelId])) {
  103. errors[channelId] = 1;
  104. } else {
  105. errors[channelId]++;
  106. }
  107. if (errors[channelId] >= 10) {
  108. slack.webhook({
  109. text: txt
  110. }, function (errs, response) {
  111. // console.log(response);
  112. });
  113. errors[channelId] = 0;
  114. }
  115. console.log("errors counter : " + channelId + " " + errors[channelId]);
  116.  
  117. return this.emit('error', new Error('Bad status code'));
  118. }
  119.  
  120. stream.pipe(feedparser);
  121. });
  122.  
  123. feedparser.on('error', function (err) {
  124. var txt = "Error in feed parser module " + link + " channelid :" + channelId + " error result: " + err;
  125. console.log(txt);
  126. if (isNaN(errors[channelId])) {
  127. errors[channelId] = 1;
  128. } else {
  129. errors[channelId]++;
  130. }
  131.  
  132. if (errors[channelId] >= 10) {
  133. slack.webhook({
  134. text: txt
  135. }, function (errs, response) {
  136. // console.log(response);
  137. });
  138. errors[channelId] = 0;
  139. }
  140. console.log("errors counter : " + channelId + " " + errors[channelId]);
  141. });
  142.  
  143. feedparser.on('readable', function () {
  144. // This is where the action is!
  145. var stream = this
  146. , meta = this.meta // **NOTE** the "meta" is always available in the context of the feedparser instance
  147. , item;
  148.  
  149. while (item = stream.read()) {
  150. ///store(item, link, channelId, channelToken);
  151.  
  152. if (item.date == null) {
  153. item.date = new Date().toString();
  154. // console.log("date is :"+item.date);
  155. // return;
  156. }
  157.  
  158. if (item.pubDate == null) {
  159. item.pubDate = new Date().toString();
  160. // console.log("date is :"+item.date);
  161. //return;
  162. }
  163.  
  164. item.date = new Date(item.date).getTime();
  165.  
  166. //console.log("date is :"+item.date);
  167.  
  168. itemArray.push(item);
  169. //console.log(" -------++----------"+ item +"==============");
  170. //emitter.emit('store', item, link, channelId, channelToken);
  171. countItemArray++;
  172. }
  173. });
  174. // promise start
  175.  
  176. feedparser.on("end", function () {
  177. console.log(" ++++++++" + link + " total : " + countItemArray + "+++++++++");
  178.  
  179. var byDate = itemArray.slice(0);
  180. byDate.sort(function (a, b) {
  181. var x = a.date;
  182. var y = b.date;
  183. return x < y ? -1 : x > y ? 1 : 0;
  184. });
  185.  
  186. //fechingAll
  187. fetchingAll(byDate , link, channelId, channelToken);
  188.  
  189. });
  190. }
  191. function fetchingAll(item, link, channelId, channelToken){
  192. var fetchpromises = item;
  193. var sequence = Promise.resolve()
  194.  
  195. fetchpromises.forEach(function(index,value){
  196. sequence = sequence.then(function(){
  197. return store(index, link, channelId, channelToken)
  198. //console.log(index)
  199. }).then(function(value){
  200.  
  201.  
  202. var size = require('request-image-size');
  203. size(value.data.imagelink, function(err, dimensions, length) {
  204. if(!err){
  205. value.data.imagewidth = dimensions.width ;
  206. value.data.imageheight = dimensions.height;
  207. //value.data.createdDate = createdDate;
  208.  
  209. console.log("insert image size, width:" + value.data.imagewidth + " | height: "+ value.data.imageheight);
  210.  
  211. return value;
  212. }
  213. });
  214.  
  215. return mysqlSave(value.link, value.channelId, value.channelToken, value.data)
  216.  
  217. }).
  218.  
  219. catch(function(err){
  220. console.log(err + ' failed to load!')
  221. })
  222. })
  223. }
  224.  
  225.  
  226. // promise end
  227. emitter.on('store', function (item, link, channelId, channelToken) {
  228. store(item, link, channelId, channelToken);
  229. });
  230.  
  231. function store(item, link, channelId, channelToken) {
  232.  
  233. var linkOri = link;
  234. var linkItem = item.link;
  235. var title = item.title;
  236. var description = item.description;
  237. var pubDate = item.date;
  238. /* var newDate = new Date(pubDate);
  239. var tz = newDate.toString().match(/([-\+][0-9]+)\s/)[1];
  240. var t = newDate.toJSON();
  241. t = t.substring(0, 19) + tz;
  242. t = t.replace("+", "-");*/
  243.  
  244. //console.log("pubDate is :" + pubDate + "link at" + link);
  245. /* if (pubDate == null) {
  246. pubDate = new Date().toString();
  247. }*/
  248. var createdDate = new Date(pubDate).toISOString().slice(0, 19).replace('T', ' ');
  249. //console.log(createdDate);
  250. var author = item.author || "";
  251. var imagewidth = 0;
  252. var imageheight = 0;
  253.  
  254. var isItemImage = item.image;
  255.  
  256. var itemImage = "";
  257. var imagelink = "a";
  258.  
  259. var linkhash = crypto.createHash('sha256').update(linkItem).digest('sha256').toString('hex');
  260.  
  261. if (isItemImage.hasOwnProperty("url") && isItemImage.url != null) {
  262. itemImage = isItemImage.url;
  263. }
  264. if (itemImage.length > 0) {
  265. imagelink = itemImage;
  266. }
  267.  
  268. if (imagelink.length < 5) {
  269. imagelink = getImage(description);
  270. }
  271.  
  272. if (imagelink.length < 5) {
  273. if (item.hasOwnProperty("link") && item.link != null) {
  274. imagelink = item.link;
  275. }
  276. }
  277.  
  278.  
  279. //get from enclosures
  280. if (imagelink.length < 5) {
  281. if (item.hasOwnProperty("enclosures") && item.enclosures != null) {
  282. var itemEnclosures = item.enclosures;
  283. if (itemEnclosures.length > 0) {
  284. if (itemEnclosures[0].hasOwnProperty("url")) {
  285. imagelink = itemEnclosures[0].url;
  286. }
  287. }
  288. }
  289. }
  290.  
  291. //get from tag rss:image
  292. if (imagelink.length < 5) {
  293. if (item.hasOwnProperty("rss:image") && item["rss:image"] != null) {
  294. var imageRss = item["rss:image"];
  295. if (imageRss.hasOwnProperty("url")) {
  296. var imageRssUrl = imageRss["url"];
  297. if (imageRssUrl.hasOwnProperty("#")) {
  298. var imageRssUrlCrash = imageRssUrl["#"];
  299. if (imageRssUrlCrash.length > 1) {
  300. imagelink = imageRss
  301. } else {
  302. var meta = item.meta;
  303. if (meta.hasOwnProperty("image") && meta.image != null) {
  304. var metaImage = meta.image;
  305. if (metaImage.hasOwnProperty("url")) {
  306. imagelink = meta["image"]["url"];
  307. }
  308. }
  309. }
  310. }
  311. }
  312.  
  313. }
  314. }
  315.  
  316. //item meta image url
  317. if (imagelink.length < 5) {
  318. var meta = item.meta;
  319. if (meta != null) {
  320. if (meta.hasOwnProperty("image")) {
  321. var metaImage = meta.image;
  322. if (metaImage.hasOwnProperty("url")) {
  323. imagelink = meta["image"]["url"];
  324. }
  325. }
  326. }
  327. }
  328.  
  329. //get from tag media:content
  330. if (imagelink < 5) {
  331. if (item.hasOwnProperty("media:content") && item["media:content"] != null) {
  332. var mediaContent = item["media:content"];
  333. imagelink = mediaContent.getAttribute("url");
  334. }
  335. }
  336.  
  337. if (imagelink < 5) {
  338. if (item.hasOwnProperty("content:encoded")) {
  339. var contentEncoded = item["content:encoded"];
  340. if (contentEncoded.hasOwnProperty("#")) {
  341. var contentCrash = contentEncoded["#"];
  342. imagelink = getImage(contentCrash);
  343. }
  344. }
  345. }
  346.  
  347. var data = {
  348. title: title,
  349. description: striptags(description),
  350. link: linkItem,
  351. imagelink: imagelink,
  352. imagewidth: imagewidth || 0,
  353. imageheight: imageheight || 0,
  354. createdDate: createdDate,
  355. channelid: channelId,
  356. username: author,
  357. linkhash: linkhash,
  358. active: 1
  359. };
  360.  
  361. if (imagelink.length > 5) {
  362. imagelink = imagelink.replace("<![CDATA[", "").replace("]]>", "");
  363. imagelink = imagelink.replace("'", "").replace('"', "");
  364. imagelink = imagelink.replace("'", "");
  365. imagelink = imagelink.replace('"', "");
  366. imagelink = imagelink.replace("&quot;", "");
  367. imagelink = striptags(imagelink); /// start
  368.  
  369. if (imagelink.match(/.js/)) { ///
  370. //emitter.emit('mysql-save', link, channelId, channelToken, data);
  371. //mysqlSave(link, channelId, channelToken, data);
  372. } else {
  373. size(imagelink, function (err, dimensions, length) {
  374. if (err) {
  375. slack.webhook({
  376. text: err
  377. }, function (errs, response) {
  378. // console.log(response);
  379. });
  380. return true;
  381. }
  382. if (!err) {
  383. if (dimensions != undefined) {
  384. imagewidth = dimensions.width;
  385. imageheight = dimensions.height;
  386. }
  387.  
  388. data = {
  389. title: title,
  390. description: striptags(description),
  391. link: linkItem,
  392. imagelink: imagelink,
  393. imagewidth: imagewidth || 0,
  394. imageheight: imageheight || 0,
  395. createdDate: createdDate,
  396. channelid: channelId,
  397. username: author,
  398. linkhash: linkhash,
  399. active: 1
  400. };
  401.  
  402. //emitter.emit('mysql-save', link, channelId, channelToken, data);
  403. //mysqlSave(link, channelId, channelToken, data);
  404. }
  405.  
  406.  
  407. });
  408. }
  409.  
  410.  
  411. } else {
  412. //emitter.emit('mysql-save', link, channelId, channelToken, data);
  413. //mysqlSave(link, channelId, channelToken, data);
  414.  
  415. }
  416.  
  417.  
  418. var fetch = {link:link,channelId:channelId,channelToken:channelToken,data:data};
  419. return fetch;
  420. }
  421.  
  422.  
  423. emitter.on('mysql-save', function (link, channelId, channelToken, data) {
  424. // var sql2 = '⁠⁠⁠insert into feed(title,description,link,imagelink,imagewidth,imageheight,channelid,active,linkhash,username) SELECT * FROM (SELECT ? as f1, ? as f2, ? as f3, ? as f4, 125 as f5,125 as f6,84 as f7,1 as f8, ? as f9, ? as f10) AS tmp WHERE NOT EXISTS (SELECT id FROM feed WHERE channelid= ? and linkhash= ?) LIMIT 1;';
  425. mysqlSave(link, channelId, channelToken, data);
  426. });
  427.  
  428. function mysqlSave(link, channelId, channelToken, data) {
  429. var id = 0;
  430. var results;
  431. var sql = ' INSERT INTO feed(title,description,link,imagelink,imagewidth,imageheight,channelid,active,linkhash,username,createdDate) SELECT * FROM (SELECT ? AS f1,? AS f2, ? AS f3,? AS f4,? AS f5,? AS f6,? AS f7,? AS f8, ? AS f9, ? as f10, ? as f11) AS tmp WHERE NOT EXISTS (SELECT id FROM feed WHERE channelid= ? AND linkhash= ?) LIMIT 1';
  432.  
  433. var arrayValue = [data.title, data.description, data.link, data.imagelink, data.imagewidth, data.imageheight, channelId, 1, data.linkhash, data.username, data.createdDate, channelId, data.linkhash];
  434. var query = connection.query(sql, arrayValue, function (errs, resulr) {
  435. var txt = data.title + " " + data.link + " " + data.description;
  436. if (errs) {
  437. console.log("error dibagian:"+ errs);
  438.  
  439. slack.webhook({
  440. text: txt + errs.toString()
  441. }, function (errs, response) {
  442. // console.log(response);
  443. });
  444. }
  445.  
  446. var imagelink = data.imagelink;
  447. imagelink.replace("&quot;","");
  448. size(imagelink, function (err, dimensions) {
  449. //console.log(dimensions.width, dimensions.height);
  450. if (!err) {
  451. // connection.query('UPDATE feed SET imagewidth = ?, imageheight = ? Where id = ?',[dimensions.width,dimensions.height, resulr.insertId]);
  452. }
  453. });
  454. });
  455.  
  456. }
  457.  
  458.  
  459. function getImage(string) {
  460. if (string != null) {
  461. string = string.replace("<![CDATA[", "").replace("]]>", "");
  462. string = string.replace("CDATA", "");
  463. string = string.replace("<figure>", "");
  464. string = string.replace("</figure>", "");
  465. string = string.replace("\n", "");
  466. string = string.replace("\t", "");
  467.  
  468. var re = /<img[^>]+src="?([^"\s]+)"?[^>]*\/>/g;
  469. var rex = /\ssrc=(?:(?:'([^']*)')|(?:"([^"]*)")|([^\s]*))/i; // match src='a' OR src="a" OR src=a
  470. var results = null;
  471. var img = "";
  472.  
  473. results = re.exec(string);
  474. if (results) {
  475. img = results[1];
  476. if (img.match(/.js/)) {
  477. var res = string.match(rex);
  478. if (res != null) {
  479. img = res[1] || res[2] || res[3]; // get the one that matched
  480. }
  481. }
  482.  
  483. } else {
  484. var res = string.match(rex);
  485. if (res != null) {
  486. img = res[1] || res[2] || res[3]; // get the one that matched
  487. }
  488. }
  489.  
  490. return img;
  491. } else {
  492. return "";
  493. }
  494.  
  495. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement