SHARE
TWEET

Untitled

a guest Jul 18th, 2019 59 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import requests
  2. import os
  3. import csv
  4. import json
  5. import datetime
  6. from PIL import Image
  7. from multiprocessing.dummy import Pool as ThreadPool
  8.  
  9.  
  10. import urllib.request as request
  11. import logging
  12.  
  13. root_pictures_folder = r'/Users/a.eryomin/Downloads/2217'
  14. path_to_folder_with_csv = r'/Users/a.eryomin/Downloads/'
  15.  
  16.  
  17. logging.basicConfig(filename=os.path.join(path_to_folder_with_csv, '920.log'), filemode='a',
  18.                     format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
  19.                     datefmt='%H:%M:%S', level=logging.INFO)
  20.  
  21.  
  22. def save_file(url_info, categ, subcateg):
  23.     """
  24.     :param url_info:
  25.     :param categ:
  26.     :param subcateg:
  27.     :return:
  28.     """
  29.     try:
  30.         image = Image.open(request.urlopen(url_info['url']))
  31.  
  32.         path_to_save_folder = os.path.join(root_pictures_folder, categ, subcateg)
  33.         if not os.path.exists(path_to_save_folder):
  34.             os.makedirs(path_to_save_folder)
  35.  
  36.         img_file_name = os.path.join(path_to_save_folder, url_info['id']+'.jpg')
  37.         image.save(os.path.join(path_to_save_folder, img_file_name), 'JPEG', quality=60)
  38.     except Exception as err:
  39.         print(err)
  40.         logging.error(f"Can't load {url_info['url']}")
  41.  
  42.  
  43. def downloder(line):
  44.     try:
  45.         categ, subcateg, product_id = line.split(',')
  46.         api_youla_url = f'http://api.youla.io/api/v1/product/{product_id}'
  47.         data = requests.get(api_youla_url).json()
  48.         for pict_url in data['data']['images']:
  49.             save_file(pict_url, categ, subcateg)
  50.  
  51.     except Exception as err:
  52.         pass
  53.  
  54.  
  55. def downloaderParllel(list_of_lines, threads=10):
  56.     pool = ThreadPool(threads)
  57.     pool.map(downloder, list_of_lines)
  58.     pool.close()
  59.     pool.join()
  60.  
  61.  
  62. if __name__ == '__main__':
  63.  
  64.     path_to_csv = os.path.join(path_to_folder_with_csv, '2217.txt')
  65.     time_start = datetime.datetime.now()
  66.  
  67.     with open(path_to_csv, 'r') as f:
  68.         lines = f.readlines()
  69.     stripped_lines = [line.rstrip('\n') for line in lines]
  70.  
  71.     downloaderParllel(stripped_lines, 20)
  72.  
  73. # with open('saved_picts.txt', 'a') as f:
  74. #     f.writelines([i for i in parrallel_loader if i is not None])
  75.  
  76.     total_time = datetime.datetime.now() - time_start
  77.     logging.info(f"Total working time {total_time}")
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top