Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /**
- * Created by ASUS-PC on 11/8/2016.
- */
- var FeedParser = require('feedparser'),
- request = require('request'),
- http = require('http'),
- https = require('https'),
- striptags = require('striptags'),
- sizeOf = require('image-size'),
- size = require('request-image-size'),
- Slack = require('slack-node'),
- mysql = require('mysql'),
- crypto = require('crypto'),
- Emitter = require('tiny-emitter'),
- errors = []
- ;
- var emitter = new Emitter();
- //this is for slack
- //webhookUri = "https://hooks.slack.com/services/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";
- webhookUri = "";
- slack = new Slack();
- slack.setWebhook(webhookUri);
- // end slack
- // setting mysql config
- var connection = mysql.createConnection({
- host: "localhost",
- user: "root",
- password: '',
- //user: "intrafeed",
- //password: 'Palmerah$$$i9i7',
- database: "intrafeed",
- charset: 'utf8mb4'
- //waitForConnections : true
- // multipleStatements: true
- });
- connection.connect(function (err) {
- if (err) {
- console.log('Error connecting to Db');
- return;
- }
- console.log('Connection established');
- });
- var intrafeedLinks = "https://intrafeed.co:9000/apit/rssChannels";
- /*
- End settings and params
- */
- //fetch('http://rss.detik.com/index.php/detikcom_nasional', 63, '416a5423119a4ec081ca4dd4d948f27f');
- start();
- //5 minutes
- setInterval(function () {
- start();
- }, 5 * 60 * 1000);
- function start() {
- var channels = null;
- request({url: intrafeedLinks, json: true}, function (error, response, body) {
- if (!error && response.statusCode == 200) {
- console.log(body); // Show the HTML for the Google homepage.
- channels = body.channels;
- for (i = 0; i < channels.length; i++) {
- //fetch link
- //fetch(channels[i].link, channels[i].channelid, channels[i].token);
- emitter.emit('fetch', channels[i].link, channels[i].channelid, channels[i].token);
- }
- }
- });
- }
- emitter.on("fetch", function (link, channelId, channelToken) {
- fetch(link, channelId, channelToken);
- });
- function fetch(link, channelId, channelToken) {
- var req = request(link);
- //var feedparser = new FeedParser();
- var feedparser = new FeedParser({addmeta: false, resume_saxerror: false});
- req.on('error', function (error) {
- // handle any request errors
- });
- var itemArray = [];
- var countItemArray = 0;
- req.on('response', function (res) {
- var stream = this;
- if (res.statusCode != 200) {
- var txt = "Error Request status Code " + res.statusCode + " link : " + link + " channelid :" + channelId + " error result: " + err;
- if (isNaN(errors[channelId])) {
- errors[channelId] = 1;
- } else {
- errors[channelId]++;
- }
- if (errors[channelId] >= 10) {
- slack.webhook({
- text: txt
- }, function (errs, response) {
- // console.log(response);
- });
- errors[channelId] = 0;
- }
- console.log("errors counter : " + channelId + " " + errors[channelId]);
- return this.emit('error', new Error('Bad status code'));
- }
- stream.pipe(feedparser);
- });
- feedparser.on('error', function (err) {
- var txt = "Error in feed parser module " + link + " channelid :" + channelId + " error result: " + err;
- console.log(txt);
- if (isNaN(errors[channelId])) {
- errors[channelId] = 1;
- } else {
- errors[channelId]++;
- }
- if (errors[channelId] >= 10) {
- slack.webhook({
- text: txt
- }, function (errs, response) {
- // console.log(response);
- });
- errors[channelId] = 0;
- }
- console.log("errors counter : " + channelId + " " + errors[channelId]);
- });
- feedparser.on('readable', function () {
- // This is where the action is!
- var stream = this
- , meta = this.meta // **NOTE** the "meta" is always available in the context of the feedparser instance
- , item;
- while (item = stream.read()) {
- ///store(item, link, channelId, channelToken);
- if (item.date == null) {
- item.date = new Date().toString();
- // console.log("date is :"+item.date);
- // return;
- }
- if (item.pubDate == null) {
- item.pubDate = new Date().toString();
- // console.log("date is :"+item.date);
- //return;
- }
- item.date = new Date(item.date).getTime();
- //console.log("date is :"+item.date);
- itemArray.push(item);
- //console.log(" -------++----------"+ item +"==============");
- //emitter.emit('store', item, link, channelId, channelToken);
- countItemArray++;
- }
- });
- // promise start
- feedparser.on("end", function () {
- console.log(" ++++++++" + link + " total : " + countItemArray + "+++++++++");
- var byDate = itemArray.slice(0);
- byDate.sort(function (a, b) {
- var x = a.date;
- var y = b.date;
- return x < y ? -1 : x > y ? 1 : 0;
- });
- //fechingAll
- fetchingAll(byDate , link, channelId, channelToken);
- });
- }
- function fetchingAll(item, link, channelId, channelToken){
- var fetchpromises = item;
- var sequence = Promise.resolve()
- fetchpromises.forEach(function(index,value){
- sequence = sequence.then(function(){
- return store(index, link, channelId, channelToken)
- //console.log(index)
- }).then(function(value){
- var size = require('request-image-size');
- size(value.data.imagelink, function(err, dimensions, length) {
- if(!err){
- // bro, gimana biar nilai get image dimension.width ini ke update di return mysql.Save() value.data
- value.data.imagewidth = dimensions.width ;
- value.data.imageheight = dimensions.height;
- console.log("insert image size, width:" + value.data.imagewidth + " | height: "+ value.data.imageheight);
- value.data;
- }
- });
- // nilai dimensions.width di array value.data gak ke update
- return mysqlSave(value.link, value.channelId, value.channelToken, value.data)
- }).
- catch(function(err){
- console.log(err + ' failed to load!')
- })
- })
- }
- // promise end
- emitter.on('store', function (item, link, channelId, channelToken) {
- store(item, link, channelId, channelToken);
- });
- function store(item, link, channelId, channelToken) {
- var linkOri = link;
- var linkItem = item.link;
- var title = item.title;
- var description = item.description;
- var pubDate = item.date;
- /* var newDate = new Date(pubDate);
- var tz = newDate.toString().match(/([-\+][0-9]+)\s/)[1];
- var t = newDate.toJSON();
- t = t.substring(0, 19) + tz;
- t = t.replace("+", "-");*/
- //console.log("pubDate is :" + pubDate + "link at" + link);
- /* if (pubDate == null) {
- pubDate = new Date().toString();
- }*/
- var createdDate = new Date(pubDate).toISOString().slice(0, 19).replace('T', ' ');
- //console.log(createdDate);
- var author = item.author || "";
- var imagewidth = 0;
- var imageheight = 0;
- var isItemImage = item.image;
- var itemImage = "";
- var imagelink = "a";
- var linkhash = crypto.createHash('sha256').update(linkItem).digest('sha256').toString('hex');
- if (isItemImage.hasOwnProperty("url") && isItemImage.url != null) {
- itemImage = isItemImage.url;
- }
- if (itemImage.length > 0) {
- imagelink = itemImage;
- }
- if (imagelink.length < 5) {
- imagelink = getImage(description);
- }
- if (imagelink.length < 5) {
- if (item.hasOwnProperty("link") && item.link != null) {
- imagelink = item.link;
- }
- }
- //get from enclosures
- if (imagelink.length < 5) {
- if (item.hasOwnProperty("enclosures") && item.enclosures != null) {
- var itemEnclosures = item.enclosures;
- if (itemEnclosures.length > 0) {
- if (itemEnclosures[0].hasOwnProperty("url")) {
- imagelink = itemEnclosures[0].url;
- }
- }
- }
- }
- //get from tag rss:image
- if (imagelink.length < 5) {
- if (item.hasOwnProperty("rss:image") && item["rss:image"] != null) {
- var imageRss = item["rss:image"];
- if (imageRss.hasOwnProperty("url")) {
- var imageRssUrl = imageRss["url"];
- if (imageRssUrl.hasOwnProperty("#")) {
- var imageRssUrlCrash = imageRssUrl["#"];
- if (imageRssUrlCrash.length > 1) {
- imagelink = imageRss
- } else {
- var meta = item.meta;
- if (meta.hasOwnProperty("image") && meta.image != null) {
- var metaImage = meta.image;
- if (metaImage.hasOwnProperty("url")) {
- imagelink = meta["image"]["url"];
- }
- }
- }
- }
- }
- }
- }
- //item meta image url
- if (imagelink.length < 5) {
- var meta = item.meta;
- if (meta != null) {
- if (meta.hasOwnProperty("image")) {
- var metaImage = meta.image;
- if (metaImage.hasOwnProperty("url")) {
- imagelink = meta["image"]["url"];
- }
- }
- }
- }
- //get from tag media:content
- if (imagelink < 5) {
- if (item.hasOwnProperty("media:content") && item["media:content"] != null) {
- var mediaContent = item["media:content"];
- imagelink = mediaContent.getAttribute("url");
- }
- }
- if (imagelink < 5) {
- if (item.hasOwnProperty("content:encoded")) {
- var contentEncoded = item["content:encoded"];
- if (contentEncoded.hasOwnProperty("#")) {
- var contentCrash = contentEncoded["#"];
- imagelink = getImage(contentCrash);
- }
- }
- }
- var data = {
- title: title,
- description: striptags(description),
- link: linkItem,
- imagelink: imagelink,
- imagewidth: imagewidth || 0,
- imageheight: imageheight || 0,
- createdDate: createdDate,
- channelid: channelId,
- username: author,
- linkhash: linkhash,
- active: 1
- };
- if (imagelink.length > 5) {
- imagelink = imagelink.replace("<![CDATA[", "").replace("]]>", "");
- imagelink = imagelink.replace("'", "").replace('"', "");
- imagelink = imagelink.replace("'", "");
- imagelink = imagelink.replace('"', "");
- imagelink = imagelink.replace(""", "");
- imagelink = striptags(imagelink);
- if (imagelink.match(/.js/)) {
- //emitter.emit('mysql-save', link, channelId, channelToken, data);
- //mysqlSave(link, channelId, channelToken, data);
- } else {
- size(imagelink, function (err, dimensions, length) {
- if (err) {
- slack.webhook({
- text: err
- }, function (errs, response) {
- // console.log(response);
- });
- return true;
- }
- if (!err) {
- if (dimensions != undefined) {
- imagewidth = dimensions.width;
- imageheight = dimensions.height;
- }
- data = {
- title: title,
- description: striptags(description),
- link: linkItem,
- imagelink: imagelink,
- imagewidth: imagewidth || 0,
- imageheight: imageheight || 0,
- createdDate: createdDate,
- channelid: channelId,
- username: author,
- linkhash: linkhash,
- active: 1
- };
- //emitter.emit('mysql-save', link, channelId, channelToken, data);
- //mysqlSave(link, channelId, channelToken, data);
- }
- });
- }
- } else {
- //emitter.emit('mysql-save', link, channelId, channelToken, data);
- //mysqlSave(link, channelId, channelToken, data);
- }
- var fetch = {link:link,channelId:channelId,channelToken:channelToken,data:data};
- return fetch;
- }
- emitter.on('mysql-save', function (link, channelId, channelToken, data) {
- // var sql2 = 'insert into feed(title,description,link,imagelink,imagewidth,imageheight,channelid,active,linkhash,username) SELECT * FROM (SELECT ? as f1, ? as f2, ? as f3, ? as f4, 125 as f5,125 as f6,84 as f7,1 as f8, ? as f9, ? as f10) AS tmp WHERE NOT EXISTS (SELECT id FROM feed WHERE channelid= ? and linkhash= ?) LIMIT 1;';
- mysqlSave(link, channelId, channelToken, data);
- });
- function mysqlSave(link, channelId, channelToken, data) {
- var id = 0;
- var results;
- var sql = ' INSERT INTO feed(title,description,link,imagelink,imagewidth,imageheight,channelid,active,linkhash,username,createdDate) SELECT * FROM (SELECT ? AS f1,? AS f2, ? AS f3,? AS f4,? AS f5,? AS f6,? AS f7,? AS f8, ? AS f9, ? as f10, ? as f11) AS tmp WHERE NOT EXISTS (SELECT id FROM feed WHERE channelid= ? AND linkhash= ?) LIMIT 1';
- var arrayValue = [data.title, data.description, data.link, data.imagelink, data.imagewidth, data.imageheight, channelId, 1, data.linkhash, data.username, data.createdDate, channelId, data.linkhash];
- var query = connection.query(sql, arrayValue, function (errs, resulr) {
- var txt = data.title + " " + data.link + " " + data.description;
- if (errs) {
- console.log("error dibagian:"+ errs);
- slack.webhook({
- text: txt + errs.toString()
- }, function (errs, response) {
- // console.log(response);
- });
- }
- var imagelink = data.imagelink;
- imagelink.replace(""","");
- size(imagelink, function (err, dimensions) {
- //console.log(dimensions.width, dimensions.height);
- if (!err) {
- // connection.query('UPDATE feed SET imagewidth = ?, imageheight = ? Where id = ?',[dimensions.width,dimensions.height, resulr.insertId]);
- }
- });
- });
- }
- function getImage(string) {
- if (string != null) {
- string = string.replace("<![CDATA[", "").replace("]]>", "");
- string = string.replace("CDATA", "");
- string = string.replace("<figure>", "");
- string = string.replace("</figure>", "");
- string = string.replace("\n", "");
- string = string.replace("\t", "");
- var re = /<img[^>]+src="?([^"\s]+)"?[^>]*\/>/g;
- var rex = /\ssrc=(?:(?:'([^']*)')|(?:"([^"]*)")|([^\s]*))/i; // match src='a' OR src="a" OR src=a
- var results = null;
- var img = "";
- results = re.exec(string);
- if (results) {
- img = results[1];
- if (img.match(/.js/)) {
- var res = string.match(rex);
- if (res != null) {
- img = res[1] || res[2] || res[3]; // get the one that matched
- }
- }
- } else {
- var res = string.match(rex);
- if (res != null) {
- img = res[1] || res[2] || res[3]; // get the one that matched
- }
- }
- return img;
- } else {
- return "";
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement