Advertisement
Guest User

Untitled

a guest
Jan 23rd, 2017
126
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.06 KB | None | 0 0
  1. #This code searches for tweets with a particuar keyword and writes certain fields into a CSV file
  2.  
  3. import sys, csv, twitter
  4. import os
  5. import tweepy
  6.  
  7. # Replace the API_KEY and API_SECRET with your application's key and secret.
  8. #This code is using AppAuthHandler, not OAuthHandler to get higher limits, 2.5 times.
  9. auth = tweepy.AppAuthHandler('API_KEY_HERE', 'API_SECRET_HERE')
  10. api = tweepy.API(auth, wait_on_rate_limit=True,
  11. wait_on_rate_limit_notify=True)
  12.  
  13.  
  14. if (not api):
  15. print ("Can't Authenticate")
  16. sys.exit(-1)
  17. def clean(val):
  18. clean = ""
  19. if val:
  20. clean = val.encode('utf-8')
  21. return clean
  22.  
  23. searchQuery = '#BandungIntoleran' #Put your hasthag here
  24. maxTweets = 10000000000 # Some arbitrary large number
  25. tweetsPerQry = 100 # this is the max the API permits
  26. fName = 'bandung_intoleran.csv' #CSV file to store the data
  27. csvfile = open(fName, 'w');
  28. csvwriter = csv.writer(csvfile)
  29.  
  30. count=0
  31.  
  32. # If results from a specific ID onwards are reqd, set since_id to that ID.
  33. # else default to no lower limit, go as far back as API allows
  34. sinceId = None
  35.  
  36. # If results only below a specific ID are, set max_id to that ID.
  37. # else default to no upper limit, start from the most recent tweet matching the search query.
  38. max_id = -1L
  39. tweetCount = 0
  40.  
  41. #print("Downloading max {0} tweets".format(maxTweets))
  42. with open(fName, 'w') as csvfile:
  43. while tweetCount < maxTweets:
  44. try:
  45. if (max_id <= 0):
  46. if (not sinceId):
  47. new_tweets = api.search(q=searchQuery, count=tweetsPerQry)
  48. else:
  49. new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
  50. since_id=sinceId)
  51. else:
  52. if (not sinceId):
  53. new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
  54. max_id=str(max_id - 1))
  55. else:
  56. new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
  57. max_id=str(max_id - 1),
  58. since_id=sinceId)
  59.  
  60. if not new_tweets:
  61. print("No more tweets found")
  62. break
  63. for tweet in new_tweets:
  64. #print tweet with details, you can add other data by putting a proper reference, look at Twitter developers website;
  65. csvwriter.writerow([tweet.created_at,
  66. clean(tweet.user.screen_name),
  67. clean(tweet.text),
  68. tweet.user.created_at,
  69. tweet.user.followers_count,
  70. tweet.user.friends_count,
  71. tweet.user.statuses_count,
  72. clean(tweet.source),
  73. clean(tweet.user.location),
  74. tweet.user.geo_enabled,
  75. tweet.user.lang,
  76. clean(tweet.user.time_zone),
  77. tweet.retweet_count
  78. ]);
  79.  
  80. tweetCount += len(new_tweets)
  81. #print("Downloaded {0} tweets".format(tweetCount))
  82. max_id = new_tweets[-1].id
  83. except Exception as e:
  84. # Just exit if any error
  85. print("some error : " + str(e))
  86. pass
  87.  
  88. print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fName))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement