networkcat

Untitled

Mar 20th, 2019
28
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # Bulk Scrape Twitter Users
  2. # multithreaded - set your thread count
  3. # March 2019
  4.  
  5. import os
  6. import shutil
  7. from multiprocessing.dummy import Pool as ThreadPool
  8.  
  9. import requests
  10. from fake_useragent import UserAgent
  11. from twitter_scraper import get_tweets
  12.  
  13. TUSERS = ["banbanr233", "bambi_cos", "Kasyou3roshieru", "kawasaki__aya"]
  14. THREADCOUNT = 4
  15. MAXPAGES = 9999
  16. ua = UserAgent()
  17.  
  18.  
  19. def dl_randua(fu, fn, uagent):
  20.     r = requests.get(fu, stream=True, headers={"User-agent": uagent})
  21.     with open(fn, "wb") as f:
  22.         r.raw.decode_content = True
  23.         shutil.copyfileobj(r.raw, f)
  24.  
  25.  
  26. def dl_list(X, directory):
  27.     for i in X:
  28.         if isinstance(i, str):
  29.             filename = os.path.join(directory, i.split("/")[-1])
  30.             uagent = ua.chrome
  31.             try:
  32.                 dl_randua(i, filename, uagent)
  33.             except:
  34.                 print("error downloading:", i)
  35.  
  36.  
  37. def dl_user(tuser):
  38.     counter = 0
  39.     directory = os.path.join(".", tuser)
  40.     try:
  41.         os.stat(directory)
  42.     except:
  43.         os.mkdir(directory)
  44.     # Because get_tweets will throw a bitch fit at the end
  45.     try:
  46.         for t in get_tweets(tuser, pages=MAXPAGES):
  47.             pics = t["entries"]["photos"]
  48.             vids = t["entries"]["videos"]
  49.             dl_list(pics + vids, directory)
  50.             counter += 1
  51.     except:
  52.         print(counter, "tweets downloaded for", tuser)
  53.  
  54.  
  55. def main():
  56.     pool = ThreadPool(THREADCOUNT)
  57.     pool.map(dl_user, TUSERS)
  58.  
  59.  
  60. if __name__ == "__main__":
  61.     main()
RAW Paste Data