Advertisement
Guest User

Untitled

a guest
Oct 19th, 2017
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.20 KB | None | 0 0
  1. var Twitter = require('twitter');
  2. var sentiment = require('sentiment');
  3. var sequelize = require('sequelize');
  4. var natural = require('natural');
  5. var sw = require('stopword');
  6.  
  7. var tokenizer = new natural.WordTokenizer();
  8.  
  9. var client = new Twitter({
  10. consumer_key: 'nkPW9Edde8heao5XRZMTVAkAG',
  11. consumer_secret: 'JbifTYiq7iSrvfwQJQBwA5JVvE2GyXlsbaRJYgBTUTSq6jFmO3',
  12. access_token_key: '916201133923745792-wEZK3v2Qg3vUkX8X7uumSvmMvNkFKBU',
  13. access_token_secret: 'Eg6qhInmE1u8jCl9mDlWXMZQ6CI9z5oMyeyEOHgJhzPGI'
  14. });
  15.  
  16. const BoW = {};
  17.  
  18. function parse(tweet){
  19. addToBoW(tweet);
  20. function addToBoW(tweet) {
  21. var tokens = tokenizer.tokenize(tweet);
  22. var words = sw.removeStopwords(tokens);
  23.  
  24. var longWords = words.filter(function(word){
  25. return (word.length > 3 && word.length < 7 && word != "http" && word != "https")
  26. });
  27.  
  28. for (var i = 0; i < longWords.length; i++) {
  29. pushWord(longWords[i]);
  30. }
  31. }
  32. };
  33.  
  34.  
  35. const databaseSchema = 'demo_schema';
  36. const username = 'root';
  37. const password = 'root';
  38. const connection = new sequelize(databaseSchema, username, password, {
  39. dialect: 'mysql',
  40. // Tried to change charset to utf8mb4 but still couldn't store tweet with emoji
  41. dialectOptions: {
  42. charset: 'utf8mb4'
  43. }
  44. });
  45.  
  46. // Check the connection
  47. connection.authenticate()
  48. .then(function () {
  49. console.log("The database is connected!");
  50. })
  51. .catch(function (err) {
  52. console.log("Oops, database connection error!");
  53. })
  54. .done();
  55.  
  56.  
  57. var tweetTable = connection.define('tweet', {
  58. tweets: {
  59. type: sequelize.STRING,
  60. charset: 'utf8mb4'
  61. }
  62. });
  63.  
  64. var wordTable = connection.define('word', {
  65. words: {
  66. type: sequelize.STRING,
  67. charset: 'utf8mb4',
  68. primaryKey: true
  69. },
  70. frequencies: {
  71. type: sequelize.DataTypes.INTEGER
  72. }
  73. });
  74.  
  75.  
  76. let testWords = ['happy', 'happy', 'sad', 'hello'];
  77.  
  78. function pushWord(word){
  79. connection.sync().then(function() {
  80. wordTable.findOrCreate({where: {words: word}, defaults: {frequencies: 0}})
  81. .spread((word, created) => {
  82. wordTable.increment('frequencies', {where: {words: word.get({
  83. plain: true
  84. }).words
  85. }});
  86. });
  87. });
  88. };
  89.  
  90. for (var i = 0; i < testWords.length; i++){
  91. // pushWord(testWords[i]);
  92. }
  93.  
  94.  
  95. let testTweet = 'happy birthday, hope you have a good day';
  96. //parse(testTweet);
  97.  
  98.  
  99. var re = /([\uE000-\uF8FF]|\uD83C[\uDC00-\uDFFF]|\uD83D[\uDC00-\uDFFF]|[\u2694-\u2697]|\uD83E[\uDD10-\uDD5D])/g;
  100.  
  101. var firehose = 'a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z';
  102. var keyword = 'robot';
  103. client.stream('statuses/filter', {track: firehose}, function(stream) {
  104. stream.on('data', function(tweet) {
  105. if(tweet.user.lang === 'en'){
  106. console.log('tweet = ' + tweet.text);
  107. var cleanTweet = tweet.text.replace(re, '').toLowerCase();
  108. console.log('cleantweet = ' + cleanTweet);
  109. parse(cleanTweet);
  110. }
  111. });
  112.  
  113. stream.on('error', function(error) {
  114. console.log(error);
  115. });
  116. });
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement