Advertisement
Guest User

Untitled

a guest
May 25th, 2015
228
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.10 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import os
  4. import sys
  5. from optparse import OptionParser
  6. import tweepy
  7. import time
  8. import logging
  9. logger = logging.getLogger(__name__)
  10.  
  11. class Streamer(tweepy.StreamListener):
  12.  
  13. def __init__(self, *args, **kwargs):
  14. try:
  15. self.hashtags = kwargs.pop('hashtags')
  16. except KeyError:
  17. pass
  18.  
  19. try:
  20. self.redis_cache = kwargs.pop('redis_cache')
  21. except KeyError:
  22. pass
  23. self.checker = time.time()
  24. return super(Streamer, self).__init__(*args, **kwargs)
  25.  
  26. def on_status(self, status):
  27. self.process_status(status)
  28. return self.should_continue()
  29.  
  30. def on_error(self, error):
  31. #print error
  32. return self.should_continue()
  33.  
  34. def on_timeout(self):
  35. return self.should_continue()
  36.  
  37. def should_continue(self):
  38. if time.time() - self.checker > 120:
  39. self.checker = time.time()
  40. return set(x.name for x in Hashtag.objects.filter(profiles__isnull=False)) == set(self.hashtags)
  41.  
  42. return True
  43.  
  44. def is_good(self, user):
  45. if user.default_profile_image:
  46. return False
  47. if not user.description or 'bot' in user.description:
  48. return False
  49. if user.followers_count < 50:
  50. return False
  51. if user.friends_count > 250:
  52. return False
  53. if int(user.friends_count) / float(user.followers_count) > 3:
  54. return False
  55. if not user.name:
  56. return False
  57. return True
  58.  
  59. def cache_users_id(self, twitter_id, hashtag):
  60. return self.redis_cache.lpush("{}-user_ids".format(hashtag), "{}".format(twitter_id))
  61.  
  62. def cache_users_screen_name(self, screen_name, hashtag):
  63. return self.redis_cache.lpush("{}-user_screen_names".format(hashtag), "{}".format(screen_name))
  64.  
  65. def cache_tweet_id(self, twitter_id, hashtag):
  66. return self.redis_cache.lpush("{}-tweet_id".format(hashtag), '{}'.format(twitter_id))
  67.  
  68. def cache_tweet_text(self, text, hashtag):
  69. return self.redis_cache.lpush("{}-tweet_text".format(hashtag), "{}".format(text))
  70.  
  71. def process_status(self, status):
  72. if not self.is_good(status.user):
  73. return True
  74. else:
  75. twitter_id = int(status.user.id)
  76. screen_name = str(status.user.screen_name)
  77. status_id = int(status.id)
  78. t_status = str(status.text.encode('utf-8'))
  79. for x in status.entities['hashtags']:
  80. hashtag = str(x['text'].lstrip('#').lower().encode('utf-8'))
  81. self.cache_users_id(hashtag=hashtag, twitter_id=twitter_id)
  82. self.cache_users_screen_name(hashtag=hashtag, screen_name=screen_name)
  83. self.cache_tweet_text(text=t_status, hashtag=hashtag)
  84.  
  85.  
  86.  
  87.  
  88. class Worker:
  89. def __init__(self, con):
  90. apikey = 'moX6M9jbbIuAnYuaAxZJFkzQY'
  91. apisecret = 'YhH1Fgr4VUyzLsKoKKQrR0bRuPbsqP4daiiZ9UbbSyZWDCsTxU'
  92. access_token = '1177046514-QVUDUBANp0p2HHiJrBJIwYXyaqjZkQg7NMHSRwA'
  93. access_secret = 'gBDQ3Z2wNGMOptyAsLffOHksIbhYlL6RvbCYSHzkWV08s'
  94. self.con = con
  95. self.auth = tweepy.OAuthHandler(apikey, apisecret)
  96. self.auth.set_access_token(access_token, access_secret)
  97.  
  98. def streamFromTwitter(self):
  99. while 1:
  100. hashtags = Hashtag.objects.filter(profiles__isnull=False).distinct()
  101.  
  102. if not hashtags:
  103. time.sleep(10)
  104. else:
  105. stream = tweepy.Stream(self.auth, Streamer(hashtags=hashtags, redis_cache=self.con))
  106. stream.filter(track=['#'+x.name for x in hashtags], languages=['en',])
  107.  
  108.  
  109.  
  110.  
  111.  
  112.  
  113. if __name__ == '__main__':
  114. usage = "usage: %prog -s PATH | --path=PATH"
  115. parser = OptionParser(usage)
  116. parser.add_option('-s', '--path', dest='path', metavar='PATH', help="The path to the Django environment")
  117. (options, args) = parser.parse_args()
  118. if not options.path:
  119. parser.error("Specify the path where manage.py is")
  120.  
  121.  
  122. os.environ['DJANGO_SETTINGS_MODULE'] = "flockwithme.prod"
  123.  
  124. sys.path.append(options.path)
  125. from django.core.wsgi import get_wsgi_application
  126. application = get_wsgi_application()
  127.  
  128.  
  129. ####################### IMPORTS ########################
  130. from django_redis import get_redis_connection
  131. from django.core.cache import cache
  132. from flockwithme.app.scheduler.models import TwitterStatus, TwitterUser, Hashtag
  133. con = get_redis_connection('default')
  134. ########################################################
  135. a = Worker(con)
  136. a.streamFromTwitter()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement