Advertisement
diagnoze

channel64

Dec 29th, 2016
123
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 16.09 KB | None | 0 0
  1. var FeedParser = require('feedparser'),
  2. request = require('request'),
  3. http = require('http'),
  4. https = require('https'),
  5. striptags = require('striptags'),
  6. sizeOf = require('image-size'),
  7. size = require('request-image-size'),
  8. Slack = require('slack-node'),
  9. mysql = require('mysql'),
  10. crypto = require('crypto'),
  11. Emitter = require('tiny-emitter'),
  12. SqlString = require('sqlstring'),
  13. errors = []
  14. ;
  15.  
  16. var emitter = new Emitter();
  17.  
  18. //this is for slack
  19. //webhookUri = "https://hooks.slack.com/services/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";
  20. webhookUri = "";
  21.  
  22. slack = new Slack();
  23. slack.setWebhook(webhookUri);
  24. // end slack
  25.  
  26. // setting mysql config
  27. var connection = mysql.createConnection({
  28. host: "localhost",
  29. user: "root",
  30. password: '',
  31. //user: "intrafeed",
  32. //password: 'Palmerah$$$i9i7',
  33. database: "intrafeed",
  34. charset: 'utf8mb4'
  35. //waitForConnections : true
  36. // multipleStatements: true
  37. });
  38.  
  39. connection.connect(function (err) {
  40. if (err) {
  41. console.log('Error connecting to Db');
  42. return;
  43. }
  44. console.log('Connection established');
  45. });
  46.  
  47. var intrafeedLinks = "https://intrafeed.co:9000/apit/rssChannels";
  48.  
  49. /*
  50. End settings and params
  51. */
  52.  
  53. //fetch('http://rss.detik.com/index.php/detikcom_nasional', 63, '416a5423119a4ec081ca4dd4d948f27f');
  54.  
  55. start();
  56. //5 minutes
  57. setInterval(function () {
  58. start();
  59. }, 5 * 60 * 1000);
  60.  
  61.  
  62. function start() {
  63. var channels = null;
  64.  
  65. request({url: intrafeedLinks, json: true}, function (error, response, body) {
  66. if (!error && response.statusCode == 200) {
  67. //console.log(body); // Show the HTML for the Google homepage.
  68. channels = body.channels;
  69. for (i = 0; i < channels.length; i++) {
  70. //fetch link
  71. //fetch(channels[i].link, channels[i].channelid, channels[i].token);
  72. if (channels[i].channelid == 64) {
  73. console.log("link channel: " + channels[i].link);
  74. emitter.emit('fetch', channels[i].link, channels[i].channelid, channels[i].token);
  75. }
  76. }
  77. }
  78. });
  79. }
  80.  
  81. emitter.on("fetch", function (link, channelId, channelToken) {
  82. fetch(link, channelId, channelToken);
  83. });
  84.  
  85. function fetch(link, channelId, channelToken) {
  86. var req = request(link);
  87. //var feedparser = new FeedParser();
  88. var feedparser = new FeedParser({addmeta: false, resume_saxerror: false});
  89.  
  90. req.on('error', function (error) {
  91. // handle any request errors
  92. });
  93.  
  94. var itemArray = [];
  95. var countItemArray = 0;
  96.  
  97. req.on('response', function (res) {
  98. var stream = this;
  99.  
  100. if (res.statusCode != 200) {
  101. var txt = "Error Request status Code " + res.statusCode + " link : " + link + " channelid :" + channelId + " error result: " + err;
  102. if (isNaN(errors[channelId])) {
  103. errors[channelId] = 1;
  104. } else {
  105. errors[channelId]++;
  106. }
  107. if (errors[channelId] >= 10) {
  108. slack.webhook({
  109. text: txt
  110. }, function (errs, response) {
  111. // console.log(response);
  112. });
  113. errors[channelId] = 0;
  114. }
  115. console.log("errors counter : " + channelId + " " + errors[channelId]);
  116.  
  117. return this.emit('error', new Error('Bad status code'));
  118. }
  119.  
  120. stream.pipe(feedparser);
  121. });
  122.  
  123. feedparser.on('error', function (err) {
  124. var txt = "Error in feed parser module " + link + " channelid :" + channelId + " error result: " + err;
  125. //console.log(txt);
  126. if (isNaN(errors[channelId])) {
  127. errors[channelId] = 1;
  128. } else {
  129. errors[channelId]++;
  130. }
  131.  
  132. if (errors[channelId] >= 10) {
  133. slack.webhook({
  134. text: txt
  135. }, function (errs, response) {
  136. // console.log(response);
  137. });
  138. errors[channelId] = 0;
  139. }
  140. //console.log("errors counter : " + channelId + " " + errors[channelId]);
  141. });
  142.  
  143. feedparser.on('readable', function () {
  144. // This is where the action is!
  145. var stream = this
  146. , meta = this.meta // **NOTE** the "meta" is always available in the context of the feedparser instance
  147. , item;
  148.  
  149. while (item = stream.read()) {
  150. ///store(item, link, channelId, channelToken);
  151.  
  152. if (item.date == null) {
  153. item.date = new Date().toString();
  154. // console.log("date is :"+item.date);
  155. // return;
  156. }
  157.  
  158. if (item.pubDate == null) {
  159. item.pubDate = new Date().toString();
  160. // console.log("date is :"+item.date);
  161. //return;
  162. }
  163.  
  164. item.date = new Date(item.date).getTime();
  165.  
  166. //console.log("date is :"+item.date);
  167.  
  168. itemArray.push(item);
  169. //console.log(" -------++----------"+ item +"==============");
  170. //emitter.emit('store', item, link, channelId, channelToken);
  171. countItemArray++;
  172. }
  173. });
  174. // promise start
  175.  
  176. feedparser.on("end", function () {
  177. console.log(" ++++++++" + link + " total : " + countItemArray + "+++++++++");
  178.  
  179. var byDate = itemArray.slice(0);
  180. byDate.sort(function (a, b) {
  181. var x = a.date;
  182. var y = b.date;
  183. return x < y ? -1 : x > y ? 1 : 0;
  184. });
  185.  
  186. //fechingAll
  187. fetchingAll(byDate, link, channelId, channelToken);
  188.  
  189. });
  190. }
  191.  
  192. /*
  193. promisses begin
  194. */
  195.  
  196. var promiseGetData = function (item, link, channelId, channelToken) {
  197. return new Promise(function (resolve, reject) {
  198. var data = store(item, link, channelId, channelToken);
  199. var datas = {item: item, link: link, channelId: channelId, channelToken: channelToken, data: data};
  200.  
  201. resolve(datas);
  202. });
  203. };
  204.  
  205. var promiseGetImage = function (captureData) {
  206. // console.log(captureData);
  207. return new Promise(function (resolve, reject) {
  208. var imagelink = captureData.data.data.imagelink;
  209. // console.log('ini imagelink ' + imagelink);
  210. //console.log("ini imagelength " + imagelink.length + "imageling: " + imagelink);
  211. if (imagelink.length > 5) {
  212. size(imagelink, function (err, dimensions, length) {
  213. if (err) {
  214. console.log("error data " +err);
  215. reject(captureData); // reject
  216. return;
  217. }
  218.  
  219. if(dimensions.width == 28 ){
  220. captureData.data.data.imagelink = '';
  221. }
  222.  
  223. captureData.data.data.imagewidth = dimensions.width;
  224. captureData.data.data.imageheight = dimensions.height;
  225. if(dimensions.width)
  226. //value.data.createdDate = createdDate;
  227.  
  228. //console.log("insert image size, width:" + captureData.data.data.imagewidth + " | height: " +captureData.data.data.imageheight);
  229.  
  230. resolve(captureData);
  231. });
  232. } else {
  233. console.log("error reject");
  234. //reject(captureData); // reject
  235. resolve(captureData);
  236. }
  237. });
  238. };
  239. //link, channelId, channelToken, data
  240. var promiseSaveData = function (results) {
  241. console.log("promiseSaveData"+results);
  242. mysqlSave(results.link, results.channelId, results.channelToken, results.data.data);
  243. };
  244.  
  245. /*
  246. promisses end
  247. */
  248. function fetchingAll(items, link, channelId, channelToken) {
  249.  
  250. var sequence = Promise.resolve();
  251. var promiseResolve = Promise.resolve(); // initial Promise always resolves
  252.  
  253. items.forEach(function (index) {
  254.  
  255. promiseResolve = promiseResolve
  256. .then(function () {
  257. return promiseGetData(index, link, channelId, channelToken);
  258. })
  259. .then(function (datas) {
  260. return promiseGetImage(datas);
  261. })
  262. .then(function (results) {
  263. console.log(results);
  264. promiseSaveData(results);
  265. }).catch(function (err) {
  266. // console.log(JSON.stringify(err) + ' failed to load!')
  267. })
  268. });
  269. }
  270.  
  271. // promise end
  272. emitter.on('store', function (item, link, channelId, channelToken) {
  273. store(item, link, channelId, channelToken);
  274. });
  275.  
  276. function store(item, link, channelId, channelToken) {
  277.  
  278. var linkOri = link;
  279. var linkItem = item.link;
  280. var guid = item.guid || item.link;
  281. var categories = item.categories || ['tgs'];
  282. var tags = categories.toString();
  283. var notes = "nts";
  284. var title = item.title;
  285. var description = item.description;
  286. var pubDate = item.date;
  287. /* var newDate = new Date(pubDate);
  288. var tz = newDate.toString().match(/([-\+][0-9]+)\s/)[1];
  289. var t = newDate.toJSON();
  290. t = t.substring(0, 19) + tz;
  291. t = t.replace("+", "-");*/
  292.  
  293. //console.log("pubDate is :" + pubDate + "link at" + link);
  294. /* if (pubDate == null) {
  295. pubDate = new Date().toString();
  296. }*/
  297. var createdDate = new Date(pubDate).toISOString().slice(0, 19).replace('T', ' ');
  298. //console.log(createdDate);
  299. var author = item.author || "";
  300. var imagewidth = 0;
  301. var imageheight = 0;
  302.  
  303. var isItemImage = item.image;
  304.  
  305. var itemImage = "";
  306. var imagelink = "a";
  307.  
  308. //var linkhash = crypto.createHash('sha256').update(linkItem).digest('sha256').toString('hex'); // by linkhas
  309. var linkhash = crypto.createHash('sha256').update(guid).digest('sha256').toString('hex'); // by guid
  310.  
  311.  
  312. if (isItemImage.hasOwnProperty("url") && isItemImage.url != null) {
  313. itemImage = isItemImage.url;
  314. }
  315. if (itemImage.length > 0) {
  316. imagelink = itemImage;
  317. // imagelink = 'lebih dari 0';
  318. }
  319.  
  320. if (imagelink.length < 5) {
  321. imagelink = getImage(description);
  322. //imagelink = 'kurang dari 5, description';
  323. }
  324.  
  325. if (imagelink.length < 5) {
  326. if (item.hasOwnProperty("link") && item.link != null) {
  327. imagelink = item.link;
  328. // imagelink = 'kurang dari 5, item link';
  329. }
  330. }
  331.  
  332.  
  333. //get from enclosures
  334. if (imagelink.length < 5) {
  335. if (item.hasOwnProperty("enclosures") && item.enclosures != null) {
  336. var itemEnclosures = item.enclosures;
  337. if (itemEnclosures.length > 0) {
  338. if (itemEnclosures[0].hasOwnProperty("url")) {
  339. imagelink = itemEnclosures[0].url;
  340. }
  341. }
  342. }
  343. }
  344.  
  345. //get from tag rss:image
  346. if (imagelink.length < 5) {
  347. if (item.hasOwnProperty("rss:image") && item["rss:image"] != null) {
  348. var imageRss = item["rss:image"];
  349. if (imageRss.hasOwnProperty("url")) {
  350. var imageRssUrl = imageRss["url"];
  351. if (imageRssUrl.hasOwnProperty("#")) {
  352. var imageRssUrlCrash = imageRssUrl["#"];
  353. if (imageRssUrlCrash.length > 1) {
  354. imagelink = imageRss
  355. } else {
  356. var meta = item.meta;
  357. if (meta.hasOwnProperty("image") && meta.image != null) {
  358. var metaImage = meta.image;
  359. if (metaImage.hasOwnProperty("url")) {
  360. imagelink = meta["image"]["url"];
  361. }
  362. }
  363. }
  364. }
  365. }
  366.  
  367. }
  368. }
  369.  
  370. //item meta image url
  371. if (imagelink.length < 5) {
  372. var meta = item.meta;
  373. if (meta != null) {
  374. if (meta.hasOwnProperty("image")) {
  375. var metaImage = meta.image;
  376. if (metaImage.hasOwnProperty("url")) {
  377. imagelink = meta["image"]["url"];
  378. }
  379. }
  380. }
  381. }
  382.  
  383. //get from tag media:content
  384. if (imagelink < 5) {
  385. if (item.hasOwnProperty("media:content") && item["media:content"] != null) {
  386. var mediaContent = item["media:content"];
  387. imagelink = mediaContent.getAttribute("url");
  388. }
  389. }
  390.  
  391. if (imagelink < 5) {
  392. if (item.hasOwnProperty("content:encoded")) {
  393. var contentEncoded = item["content:encoded"];
  394. if (contentEncoded.hasOwnProperty("#")) {
  395. var contentCrash = contentEncoded["#"];
  396. imagelink = getImage(contentCrash);
  397. }
  398. }
  399. }
  400.  
  401.  
  402. if (imagelink.length > 5) {
  403. imagelink = imagelink.replace("<![CDATA[", "").replace("]]>", "");
  404. imagelink = imagelink.replace("'", "").replace('"', "");
  405. imagelink = imagelink.replace("'", "");
  406. imagelink = imagelink.replace('"', "");
  407. imagelink = imagelink.replace("&quot;", "");
  408. imagelink = striptags(imagelink);
  409.  
  410.  
  411. }
  412.  
  413.  
  414. var data = {
  415. title: striptags(title),
  416. description: striptags(description),
  417. link: linkItem,
  418. imagelink: imagelink,
  419. imagewidth: imagewidth || 0,
  420. imageheight: imageheight || 0,
  421. createdDate: createdDate,
  422. channelid: channelId,
  423. username: author,
  424. linkhash: linkhash,
  425. notes: notes,
  426. tags: tags,
  427. active: 1
  428. };
  429.  
  430.  
  431. return {link: link, channelId: channelId, channelToken: channelToken, data: data};
  432.  
  433. }
  434.  
  435.  
  436. emitter.on('mysql-save', function (link, channelId, channelToken, data) {
  437. // var sql2 = '???insert into feed(title,description,link,imagelink,imagewidth,imageheight,channelid,active,linkhash,username) SELECT * FROM (SELECT ? as f1, ? as f2, ? as f3, ? as f4, 125 as f5,125 as f6,84 as f7,1 as f8, ? as f9, ? as f10) AS tmp WHERE NOT EXISTS (SELECT id FROM feed WHERE channelid= ? and linkhash= ?) LIMIT 1;';
  438. mysqlSave(link, channelId, channelToken, data);
  439. });
  440.  
  441. function mysqlSave(link, channelId, channelToken, data) {
  442. var id = 0;
  443. var results;
  444. var imagelink = "";
  445. var sql = ' INSERT INTO feed(title,description,link,imagelink,imagewidth,imageheight,channelid,active,linkhash,username,createdDate,notes,tags) SELECT * FROM (SELECT ? AS f1,? AS f2, ? AS f3,? AS f4,? AS f5,? AS f6,? AS f7,? AS f8, ? AS f9, ? as f10, ? as f11, ? as f12, ? as f13) AS tmp WHERE NOT EXISTS (SELECT id FROM feed WHERE channelid= ? AND linkhash= ?) LIMIT 1';
  446.  
  447. var arrayValue = [data.title, data.description, data.link, imagelink, data.imagewidth, data.imageheight, channelId, 1, data.linkhash, data.username, data.createdDate, data.notes, data.tags, channelId, data.linkhash];
  448.  
  449. var query = connection.query(sql, arrayValue, function (errs, resulr) {
  450. var txt = data.title + " " + data.link + " " + data.description;
  451. if (errs) {
  452. console.log("error save sql:" + errs);
  453.  
  454. slack.webhook({
  455. text: txt + errs.toString()
  456. }, function (errs, response) {
  457. // console.log(response);
  458. });
  459. }
  460.  
  461.  
  462. });
  463.  
  464. }
  465.  
  466.  
  467. function getImage(string) {
  468. if (string != null) {
  469. string = string.replace("<![CDATA[", "").replace("]]>", "");
  470. string = string.replace("CDATA", "");
  471. string = string.replace("<figure>", "");
  472. string = string.replace("</figure>", "");
  473. string = string.replace("\n", "");
  474. string = string.replace("\t", "");
  475.  
  476. var re = /<img[^>]+src="?([^"\s]+)"?[^>]*\/>/g;
  477. var rex = /\ssrc=(?:(?:'([^']*)')|(?:"([^"]*)")|([^\s]*))/i; // match src='a' OR src="a" OR src=a
  478. var results = null;
  479. var img = "";
  480.  
  481. results = re.exec(string);
  482. if (results) {
  483. img = results[1];
  484. if (img.match(/.js/)) {
  485. var res = string.match(rex);
  486. if (res != null) {
  487. img = res[1] || res[2] || res[3]; // get the one that matched
  488. }
  489. }
  490.  
  491. } else {
  492. var res = string.match(rex);
  493. if (res != null) {
  494. img = res[1] || res[2] || res[3]; // get the one that matched
  495. }
  496. }
  497.  
  498. return img;
  499. } else {
  500. return "";
  501. }
  502.  
  503. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement