Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- var FeedParser = require('feedparser'),
- request = require('request'),
- http = require('http'),
- https = require('https'),
- striptags = require('striptags'),
- sizeOf = require('image-size'),
- size = require('request-image-size'),
- Slack = require('slack-node'),
- mysql = require('mysql'),
- crypto = require('crypto'),
- Emitter = require('tiny-emitter'),
- errors = []
- ;
- var emitter = new Emitter();
- //this is for slack
- //webhookUri = "https://hooks.slack.com/services/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";
- webhookUri = "";
- slack = new Slack();
- slack.setWebhook(webhookUri);
- // end slack
- // setting mysql config
- var connection = mysql.createConnection({
- host: "localhost",
- user: "root",
- password: '',
- //user: "intrafeed",
- //password: 'Palmerah$$$i9i7',
- database: "intrafeed",
- charset: 'utf8mb4'
- //waitForConnections : true
- // multipleStatements: true
- });
- connection.connect(function (err) {
- if (err) {
- console.log('Error connecting to Db');
- return;
- }
- console.log('Connection established');
- });
- var intrafeedLinks = "https://intrafeed.co:9000/apit/rssChannels";
- /*
- End settings and params
- */
- //fetch('http://rss.detik.com/index.php/detikcom_nasional', 63, '416a5423119a4ec081ca4dd4d948f27f');
- start();
- //5 minutes
- setInterval(function () {
- start();
- }, 5 * 60 * 1000);
- function start() {
- var channels = null;
- request({url: intrafeedLinks, json: true}, function (error, response, body) {
- if (!error && response.statusCode == 200) {
- console.log(body); // Show the HTML for the Google homepage.
- channels = body.channels;
- for (i = 0; i < channels.length; i++) {
- //fetch link
- //fetch(channels[i].link, channels[i].channelid, channels[i].token);
- emitter.emit('fetch', channels[i].link, channels[i].channelid, channels[i].token);
- }
- }
- });
- }
- emitter.on("fetch", function (link, channelId, channelToken) {
- fetch(link, channelId, channelToken);
- });
- function fetch(link, channelId, channelToken) {
- var req = request(link);
- //var feedparser = new FeedParser();
- var feedparser = new FeedParser({addmeta: false, resume_saxerror: false});
- req.on('error', function (error) {
- // handle any request errors
- });
- var itemArray = [];
- var countItemArray = 0;
- req.on('response', function (res) {
- var stream = this;
- if (res.statusCode != 200) {
- var txt = "Error Request status Code " + res.statusCode + " link : " + link + " channelid :" + channelId + " error result: " + err;
- if (isNaN(errors[channelId])) {
- errors[channelId] = 1;
- } else {
- errors[channelId]++;
- }
- if (errors[channelId] >= 10) {
- slack.webhook({
- text: txt
- }, function (errs, response) {
- // console.log(response);
- });
- errors[channelId] = 0;
- }
- console.log("errors counter : " + channelId + " " + errors[channelId]);
- return this.emit('error', new Error('Bad status code'));
- }
- stream.pipe(feedparser);
- });
- feedparser.on('error', function (err) {
- var txt = "Error in feed parser module " + link + " channelid :" + channelId + " error result: " + err;
- console.log(txt);
- if (isNaN(errors[channelId])) {
- errors[channelId] = 1;
- } else {
- errors[channelId]++;
- }
- if (errors[channelId] >= 10) {
- slack.webhook({
- text: txt
- }, function (errs, response) {
- // console.log(response);
- });
- errors[channelId] = 0;
- }
- console.log("errors counter : " + channelId + " " + errors[channelId]);
- });
- feedparser.on('readable', function () {
- // This is where the action is!
- var stream = this
- , meta = this.meta // **NOTE** the "meta" is always available in the context of the feedparser instance
- , item;
- while (item = stream.read()) {
- ///store(item, link, channelId, channelToken);
- if (item.date == null) {
- item.date = new Date().toString();
- // console.log("date is :"+item.date);
- // return;
- }
- if (item.pubDate == null) {
- item.pubDate = new Date().toString();
- // console.log("date is :"+item.date);
- //return;
- }
- item.date = new Date(item.date).getTime();
- //console.log("date is :"+item.date);
- itemArray.push(item);
- //console.log(" -------++----------"+ item +"==============");
- //emitter.emit('store', item, link, channelId, channelToken);
- countItemArray++;
- }
- });
- // promise start
- feedparser.on("end", function () {
- console.log(" ++++++++" + link + " total : " + countItemArray + "+++++++++");
- var byDate = itemArray.slice(0);
- byDate.sort(function (a, b) {
- var x = a.date;
- var y = b.date;
- return x < y ? -1 : x > y ? 1 : 0;
- });
- //fechingAll
- fetchingAll(byDate, link, channelId, channelToken);
- });
- }
- /*
- promisses begin
- */
- var promiseGetData = function (item, link, channelId, channelToken) {
- return new Promise(function (resolve, reject) {
- var data = store(item, link, channelId, channelToken);
- var datas = {item: item, link: link, channelId: channelId, channelToken: channelToken, data: data};
- resolve(datas);
- });
- };
- var promiseGetImage = function (captureData) {
- //console.log(captureData);
- return new Promise(function (resolve, reject) {
- var imagelink = captureData.data.data.imagelink;
- //console.log(imagelink);
- if (imagelink.length > 5) {
- size(imagelink, function (err, dimensions, length) {
- if (err) {
- console.log("error"+err);
- reject(captureData);
- return;
- }
- captureData.data.data.imagewidth = dimensions.width;
- captureData.data.data.imageheight = dimensions.height;
- //value.data.createdDate = createdDate;
- //console.log("insert image size, width:" + captureData.data.data.imagewidth + " | height: " +captureData.data.data.imageheight);
- resolve(captureData);
- });
- } else {
- console.log("error reject");
- reject(captureData);
- }
- });
- };
- //link, channelId, channelToken, data
- var promiseSaveData = function (results) {
- console.log("promiseSaveData"+results);
- mysqlSave(results.link, results.channelId, results.channelToken, results.data.data);
- };
- /*
- promisses end
- */
- function fetchingAll(items, link, channelId, channelToken) {
- var sequence = Promise.resolve();
- var promiseResolve = Promise.resolve(); // initial Promise always resolves
- items.forEach(function (index) {
- promiseResolve = promiseResolve
- .then(function () {
- return promiseGetData(index, link, channelId, channelToken);
- })
- .then(function (datas) {
- return promiseGetImage(datas);
- })
- .then(function (results) {
- console.log(results);
- promiseSaveData(results);
- }).catch(function (err) {
- // console.log(JSON.stringify(err) + ' failed to load!')
- })
- });
- }
- // promise end
- emitter.on('store', function (item, link, channelId, channelToken) {
- store(item, link, channelId, channelToken);
- });
- function store(item, link, channelId, channelToken) {
- var linkOri = link;
- var linkItem = item.link;
- var title = item.title;
- var description = item.description;
- var pubDate = item.date;
- /* var newDate = new Date(pubDate);
- var tz = newDate.toString().match(/([-\+][0-9]+)\s/)[1];
- var t = newDate.toJSON();
- t = t.substring(0, 19) + tz;
- t = t.replace("+", "-");*/
- //console.log("pubDate is :" + pubDate + "link at" + link);
- /* if (pubDate == null) {
- pubDate = new Date().toString();
- }*/
- var createdDate = new Date(pubDate).toISOString().slice(0, 19).replace('T', ' ');
- //console.log(createdDate);
- var author = item.author || "";
- var imagewidth = 0;
- var imageheight = 0;
- var isItemImage = item.image;
- var itemImage = "";
- var imagelink = "a";
- var linkhash = crypto.createHash('sha256').update(linkItem).digest('sha256').toString('hex');
- if (isItemImage.hasOwnProperty("url") && isItemImage.url != null) {
- itemImage = isItemImage.url;
- }
- if (itemImage.length > 0) {
- imagelink = itemImage;
- }
- if (imagelink.length < 5) {
- imagelink = getImage(description);
- }
- if (imagelink.length < 5) {
- if (item.hasOwnProperty("link") && item.link != null) {
- imagelink = item.link;
- }
- }
- //get from enclosures
- if (imagelink.length < 5) {
- if (item.hasOwnProperty("enclosures") && item.enclosures != null) {
- var itemEnclosures = item.enclosures;
- if (itemEnclosures.length > 0) {
- if (itemEnclosures[0].hasOwnProperty("url")) {
- imagelink = itemEnclosures[0].url;
- }
- }
- }
- }
- //get from tag rss:image
- if (imagelink.length < 5) {
- if (item.hasOwnProperty("rss:image") && item["rss:image"] != null) {
- var imageRss = item["rss:image"];
- if (imageRss.hasOwnProperty("url")) {
- var imageRssUrl = imageRss["url"];
- if (imageRssUrl.hasOwnProperty("#")) {
- var imageRssUrlCrash = imageRssUrl["#"];
- if (imageRssUrlCrash.length > 1) {
- imagelink = imageRss
- } else {
- var meta = item.meta;
- if (meta.hasOwnProperty("image") && meta.image != null) {
- var metaImage = meta.image;
- if (metaImage.hasOwnProperty("url")) {
- imagelink = meta["image"]["url"];
- }
- }
- }
- }
- }
- }
- }
- //item meta image url
- if (imagelink.length < 5) {
- var meta = item.meta;
- if (meta != null) {
- if (meta.hasOwnProperty("image")) {
- var metaImage = meta.image;
- if (metaImage.hasOwnProperty("url")) {
- imagelink = meta["image"]["url"];
- }
- }
- }
- }
- //get from tag media:content
- if (imagelink < 5) {
- if (item.hasOwnProperty("media:content") && item["media:content"] != null) {
- var mediaContent = item["media:content"];
- imagelink = mediaContent.getAttribute("url");
- }
- }
- if (imagelink < 5) {
- if (item.hasOwnProperty("content:encoded")) {
- var contentEncoded = item["content:encoded"];
- if (contentEncoded.hasOwnProperty("#")) {
- var contentCrash = contentEncoded["#"];
- imagelink = getImage(contentCrash);
- }
- }
- }
- if (imagelink.length > 5) {
- imagelink = imagelink.replace("<![CDATA[", "").replace("]]>", "");
- imagelink = imagelink.replace("'", "").replace('"', "");
- imagelink = imagelink.replace("'", "");
- imagelink = imagelink.replace('"', "");
- imagelink = imagelink.replace(""", "");
- imagelink = striptags(imagelink); /// start
- }
- var data = {
- title: title,
- description: striptags(description),
- link: linkItem,
- imagelink: imagelink,
- imagewidth: imagewidth || 0,
- imageheight: imageheight || 0,
- createdDate: createdDate,
- channelid: channelId,
- username: author,
- linkhash: linkhash,
- active: 1
- };
- return {link: link, channelId: channelId, channelToken: channelToken, data: data};
- }
- emitter.on('mysql-save', function (link, channelId, channelToken, data) {
- // var sql2 = 'insert into feed(title,description,link,imagelink,imagewidth,imageheight,channelid,active,linkhash,username) SELECT * FROM (SELECT ? as f1, ? as f2, ? as f3, ? as f4, 125 as f5,125 as f6,84 as f7,1 as f8, ? as f9, ? as f10) AS tmp WHERE NOT EXISTS (SELECT id FROM feed WHERE channelid= ? and linkhash= ?) LIMIT 1;';
- mysqlSave(link, channelId, channelToken, data);
- });
- function mysqlSave(link, channelId, channelToken, data) {
- var id = 0;
- var results;
- var sql = ' INSERT INTO feed(title,description,link,imagelink,imagewidth,imageheight,channelid,active,linkhash,username,createdDate) SELECT * FROM (SELECT ? AS f1,? AS f2, ? AS f3,? AS f4,? AS f5,? AS f6,? AS f7,? AS f8, ? AS f9, ? as f10, ? as f11) AS tmp WHERE NOT EXISTS (SELECT id FROM feed WHERE channelid= ? AND linkhash= ?) LIMIT 1';
- var arrayValue = [data.title, data.description, data.link, data.imagelink, data.imagewidth, data.imageheight, channelId, 1, data.linkhash, data.username, data.createdDate, channelId, data.linkhash];
- var query = connection.query(sql, arrayValue, function (errs, resulr) {
- var txt = data.title + " " + data.link + " " + data.description;
- if (errs) {
- console.log("error dibagian:" + errs);
- slack.webhook({
- text: txt + errs.toString()
- }, function (errs, response) {
- // console.log(response);
- });
- }
- });
- }
- function getImage(string) {
- if (string != null) {
- string = string.replace("<![CDATA[", "").replace("]]>", "");
- string = string.replace("CDATA", "");
- string = string.replace("<figure>", "");
- string = string.replace("</figure>", "");
- string = string.replace("\n", "");
- string = string.replace("\t", "");
- var re = /<img[^>]+src="?([^"\s]+)"?[^>]*\/>/g;
- var rex = /\ssrc=(?:(?:'([^']*)')|(?:"([^"]*)")|([^\s]*))/i; // match src='a' OR src="a" OR src=a
- var results = null;
- var img = "";
- results = re.exec(string);
- if (results) {
- img = results[1];
- if (img.match(/.js/)) {
- var res = string.match(rex);
- if (res != null) {
- img = res[1] || res[2] || res[3]; // get the one that matched
- }
- }
- } else {
- var res = string.match(rex);
- if (res != null) {
- img = res[1] || res[2] || res[3]; // get the one that matched
- }
- }
- return img;
- } else {
- return "";
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement