Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- var Twitter = require('twitter');
- var sentiment = require('sentiment');
- var sequelize = require('sequelize');
- var natural = require('natural');
- var sw = require('stopword');
- var tokenizer = new natural.WordTokenizer();
- var client = new Twitter({
- consumer_key: 'nkPW9Edde8heao5XRZMTVAkAG',
- consumer_secret: 'JbifTYiq7iSrvfwQJQBwA5JVvE2GyXlsbaRJYgBTUTSq6jFmO3',
- access_token_key: '916201133923745792-wEZK3v2Qg3vUkX8X7uumSvmMvNkFKBU',
- access_token_secret: 'Eg6qhInmE1u8jCl9mDlWXMZQ6CI9z5oMyeyEOHgJhzPGI'
- });
- const BoW = {};
- function parse(tweet){
- addToBoW(tweet);
- function addToBoW(tweet) {
- var tokens = tokenizer.tokenize(tweet);
- var words = sw.removeStopwords(tokens);
- var longWords = words.filter(function(word){
- return (word.length > 3 && word.length < 7 && word != "http" && word != "https")
- });
- for (var i = 0; i < longWords.length; i++) {
- pushWord(longWords[i]);
- }
- }
- };
- const databaseSchema = 'demo_schema';
- const username = 'root';
- const password = 'root';
- const connection = new sequelize(databaseSchema, username, password, {
- dialect: 'mysql',
- // Tried to change charset to utf8mb4 but still couldn't store tweet with emoji
- dialectOptions: {
- charset: 'utf8mb4'
- }
- });
- // Check the connection
- connection.authenticate()
- .then(function () {
- console.log("The database is connected!");
- })
- .catch(function (err) {
- console.log("Oops, database connection error!");
- })
- .done();
- var tweetTable = connection.define('tweet', {
- tweets: {
- type: sequelize.STRING,
- charset: 'utf8mb4'
- }
- });
- var wordTable = connection.define('word', {
- words: {
- type: sequelize.STRING,
- charset: 'utf8mb4',
- primaryKey: true
- },
- frequencies: {
- type: sequelize.DataTypes.INTEGER
- }
- });
- let testWords = ['happy', 'happy', 'sad', 'hello'];
- function pushWord(word){
- connection.sync().then(function() {
- wordTable.findOrCreate({where: {words: word}, defaults: {frequencies: 0}})
- .spread((word, created) => {
- wordTable.increment('frequencies', {where: {words: word.get({
- plain: true
- }).words
- }});
- });
- });
- };
- for (var i = 0; i < testWords.length; i++){
- // pushWord(testWords[i]);
- }
- let testTweet = 'happy birthday, hope you have a good day';
- //parse(testTweet);
- var re = /([\uE000-\uF8FF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDFFF]|[\u2694-\u2697]|\uD83E[\uDD10-\uDD5D])/g;
- var firehose = 'a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z';
- var keyword = 'robot';
- client.stream('statuses/filter', {track: firehose}, function(stream) {
- stream.on('data', function(tweet) {
- if(tweet.user.lang === 'en'){
- console.log('tweet = ' + tweet.text);
- var cleanTweet = tweet.text.replace(re, '').toLowerCase();
- console.log('cleantweet = ' + cleanTweet);
- parse(cleanTweet);
- }
- });
- stream.on('error', function(error) {
- console.log(error);
- });
- });
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement