Advertisement
Guest User

instagram-dl

a guest
Sep 2nd, 2016
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.07 KB | None | 0 0
  1. import concurrent.futures
  2. import errno
  3. import json
  4. import os
  5. import re
  6. import requests
  7. import tqdm
  8. import sys
  9. import warnings
  10.  
  11. warnings.filterwarnings("ignore")
  12.  
  13. class InstagramScraper:
  14.  
  15.     def __init__(self, username):
  16.         self.username = username
  17.         self.numPosts = 0
  18.         self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=10)
  19.         self.future_to_item = {}
  20.  
  21.     def crawl(self, max_id=None):
  22.         """Crawls through the user's media"""
  23.         media = self.get_media(max_id)
  24.  
  25.         self.numPosts += len(media['items'])
  26.         sys.stdout.write('\rFound %i post(s)' % self.numPosts)
  27.         sys.stdout.flush()
  28.  
  29.         for item in media['items']:
  30.             future = self.executor.submit(self.download, item, './' + self.username)
  31.             self.future_to_item[future] = item
  32.  
  33.         if 'more_available' in media and media['more_available'] is True:
  34.             max_id = media['items'][-1]['id']
  35.             self.crawl(max_id)
  36.  
  37.     def get_media(self, max_id):
  38.         """Gets the user's media metadata"""
  39.         url = 'https://instagram.com/' + self.username + '/media'
  40.  
  41.         if max_id is not None:
  42.             url += '?&max_id=' + max_id
  43.         resp = requests.get(url)
  44.  
  45.         if resp.status_code == 200:
  46.             media = json.loads(resp.text)
  47.  
  48.             if not media['items']:
  49.                 raise ValueError('User %s is private' % self.username)
  50.  
  51.             return media
  52.         else:
  53.             raise ValueError('User %s does not exist' % self.username)
  54.  
  55.     def download(self, item, save_dir='./'):
  56.         """Downloads the media file"""
  57.         try:
  58.             os.makedirs(save_dir)
  59.         except OSError as e:
  60.             if e.errno == errno.EEXIST and os.path.isdir(save_dir):
  61.                 # another thread beat us to creating this dir
  62.                 pass
  63.             else:
  64.                 # target dir exists as a file, or a different error
  65.                 raise
  66.  
  67.         item['url'] = item[item['type'] + 's']['standard_resolution']['url'].split('?')[0]
  68.         if item['url'].find('.mp4') < 0 :
  69.             item['url'] = item[item['type'] + 's']['thumbnail']['url'].split('?')[0]
  70.             item['url'] = re.sub(r'/c\d+\.\d+\.\d+\.\d+/', '/', item['url'])
  71.        
  72.         # remove dimensions to get largest image
  73.         item['url'] = re.sub(r'/s\d+x\d+/', '/', item['url'])
  74.  
  75.         base_name = item['url'].split('/')[-1]
  76.         file_path = os.path.join(save_dir, base_name)
  77.  
  78.         if not os.path.isfile(file_path):
  79.  
  80.             with open(file_path, 'wb') as file:
  81.                 bytes = requests.get(item['url']).content
  82.                 file.write(bytes)
  83.  
  84.             file_time = int(item['created_time'])
  85.             os.utime(file_path, (file_time, file_time))
  86.  
  87. if __name__ == '__main__':
  88.     username = sys.argv[1]
  89.  
  90.     scraper = InstagramScraper(username)
  91.     scraper.crawl()
  92.  
  93.     for future in tqdm.tqdm(concurrent.futures.as_completed(scraper.future_to_item), total=len(scraper.future_to_item), desc='Downloading'):
  94.         item = scraper.future_to_item[future]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement