Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import tweepy
- import random
- from collections import defaultdict
- class MyStreamListener(tweepy.StreamListener):
- def __init__(self):
- super(MyStreamListener, self).__init__()
- self.counter = 0
- self.limit = 150
- self.tweets = list()
- self.tags_counts = defaultdict(lambda: 0)
- def on_status(self, tweet):
- if len(tweet.entities['hashtags']) > 0:
- self.counter += 1
- # todo: confirm the second condition
- if self.counter <= self.limit or random.random() > self.limit / self.counter:
- for tags in tweet.entities['hashtags']:
- tag = tags['text']
- self.tags_counts[tag] += 1
- if self.counter > self.limit:
- tweet_index_to_remove = random.randint(0, self.limit - 1)
- tweet_to_remove = self.tweets[tweet_index_to_remove]
- for tags in tweet_to_remove.entities['hashtags']:
- tag = tags['text']
- self.tags_counts[tag] -= 1
- if self.tags_counts[tag] == 0:
- del self.tags_counts[tag]
- self.tweets[tweet_index_to_remove] = tweet
- else:
- self.tweets.append(tweet)
- print("The number of tweets with tags from the beginning: " + str(self.counter))
- list_of_key_values = sorted(self.tags_counts.items(), key=lambda a: (-a[1], a[0]))
- print_list = list_of_key_values[0:5]
- for k, v in list_of_key_values[5:]:
- if v == print_list[-1][1]:
- print_list.append((k, v))
- else:
- break
- for k, v in print_list:
- print("%s : %s"%(k,str(v)))
- print("")
- def on_error(self, status_code):
- if status_code == 420:
- # returning False in on_error disconnects the stream
- return False
- if __name__ == '__main__':
- api_key = '7lxnA7IDNPvATjDZ0s6Ti5NbV'
- api_key_secret = 'M8LLuPz1EaFihcas3VIJWS3sfdtYB41zfKEp82tXpEKkCLwBbT'
- access_token = '1298166660-kLq9VbZePSnpjkU2tCmUsSTE3Xw0OINb8rSx9w8'
- access_token_secret = 'K8AGxIE4MLzr4JJIBkCI2EnZ3kyrHuyJep5e6yVaT6F6n'
- auth = tweepy.OAuthHandler(api_key, api_key_secret)
- auth.set_access_token(access_token, access_token_secret)
- api = tweepy.API(auth)
- myStreamListener = MyStreamListener()
- myStream = tweepy.Stream(auth=api.auth, listener=myStreamListener)
- myStream.filter(track=['#'])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement