Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- var fs = require('fs');
- var _ = require('lodash');
- var async = require('async');
- var Twit = require('twit');
- var pg = require('pg').native;
- var colors = require('colors');
- // Database connections - TODO - Use env
- var finished = function (db) {
- db.end();
- console.log('Crawl complete'.blue)
- };
- var blockedColons = ['http', 'https'];
- var parseColon = function (tweet) {
- var colonRegex = /^([\sa-zA-Z0-9]{0,20}):(.*)$/;
- // Has to be in first 15 characters
- var match = colonRegex.exec(tweet);
- var colon = null
- if (match !== null) {
- var matchedColon = match[1];
- var text = match[2];
- var blocked = false;
- _.each(blockedColons, function (block) {
- if (matchedColon.indexOf(block) !== -1) {
- blocked = true;
- }
- })
- if (!blocked) {
- console.log('Found colon'.green, matchedColon, ':', text);
- colon = {
- colon: matchedColon,
- text: text
- }
- } else {
- console.log('Colon is in the blocked list'.yellow, matchedColon);
- }
- }
- return colon;
- }
- pg.connect(conString, function (err, db, done) {
- if (err) {
- return console.error('error fetching client from pool', err);
- } else {
- console.log('Postgres connected');
- };
- var currentTime = new Date().getTime();
- // Find outdated users
- db.query("SELECT * FROM users WHERE last_checked-" + currentTime + " < -60", function (err, result) {
- var users = result.rows;
- if (users.length > 0) {
- console.log('Found users who have to be checked'.green, users.length);
- async.eachSeries(users, function (user, callback) {
- console.log('Loading data for'.green, user.screen_name);
- var T = new Twit({
- consumer_key: "",
- consumer_secret: "",
- access_token: user.access_token,
- access_token_secret: user.access_token_secret
- });
- T.get('users/show', {
- screen_name: user.screen_name
- }, function (err, twitter_user, response) {
- db.query("UPDATE users SET twitter_name=$1, twitter_location=$2, twitter_description=$3, twitter_picture=$4, twitter_followers=$5 WHERE screen_name=$6", [twitter_user.name, twitter_user.location, twitter_user.description, twitter_user.profile_image_url_https.replace('_normal', ''), twitter_user.followers_count, user.screen_name], function (err, result) {
- console.log('User bio updated'.green);
- T.get('statuses/user_timeline', {
- count: 200,
- include_rts: false,
- exclude_replies: false
- }, function (err, data, response) {
- //console.log(data);
- var tweets = _.map(data, function (tweet) {
- var colon = parseColon(tweet.text);
- if (colon !== null) {
- colon.tweet = tweet;
- }
- return colon;
- });
- tweets = _.filter(tweets, function (tweet) {
- return tweet !== null ? true : false;
- });
- async.eachSeries(tweets, function (tweet, cb) {
- db.query("INSERT INTO tweets (user_id, colon, text, tweeted_at, tweet_id, tweet_content) VALUES ($1,$2,$3,$4,$5,$6)", [user.id, tweet.colon, tweet.text, tweet.tweet.created_at, tweet.tweet.id_str, null], function (err, result) {
- if (err) {
- console.log('Tweet already exist'.yellow);
- } else {
- console.log('Added new colon based tweet'.green);
- }
- cb();
- });
- }, function () {
- callback();
- });
- });
- });
- });
- }, function () {
- finished(db);
- })
- } else {
- finished(db);
- }
- });
- });
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement