Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import requests
- import os
- import csv
- import json
- import datetime
- from PIL import Image
- from multiprocessing.dummy import Pool as ThreadPool
- import urllib.request as request
- import logging
- root_pictures_folder = r'/Users/a.eryomin/Downloads/2217'
- path_to_folder_with_csv = r'/Users/a.eryomin/Downloads/'
- logging.basicConfig(filename=os.path.join(path_to_folder_with_csv, '920.log'), filemode='a',
- format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
- datefmt='%H:%M:%S', level=logging.INFO)
- def save_file(url_info, categ, subcateg):
- """
- :param url_info:
- :param categ:
- :param subcateg:
- :return:
- """
- try:
- image = Image.open(request.urlopen(url_info['url']))
- path_to_save_folder = os.path.join(root_pictures_folder, categ, subcateg)
- if not os.path.exists(path_to_save_folder):
- os.makedirs(path_to_save_folder)
- img_file_name = os.path.join(path_to_save_folder, url_info['id']+'.jpg')
- image.save(os.path.join(path_to_save_folder, img_file_name), 'JPEG', quality=60)
- except Exception as err:
- print(err)
- logging.error(f"Can't load {url_info['url']}")
- def downloder(line):
- try:
- categ, subcateg, product_id = line.split(',')
- api_youla_url = f'http://api.youla.io/api/v1/product/{product_id}'
- data = requests.get(api_youla_url).json()
- for pict_url in data['data']['images']:
- save_file(pict_url, categ, subcateg)
- except Exception as err:
- pass
- def downloaderParllel(list_of_lines, threads=10):
- pool = ThreadPool(threads)
- pool.map(downloder, list_of_lines)
- pool.close()
- pool.join()
- if __name__ == '__main__':
- path_to_csv = os.path.join(path_to_folder_with_csv, '2217.txt')
- time_start = datetime.datetime.now()
- with open(path_to_csv, 'r') as f:
- lines = f.readlines()
- stripped_lines = [line.rstrip('\n') for line in lines]
- downloaderParllel(stripped_lines, 20)
- # with open('saved_picts.txt', 'a') as f:
- # f.writelines([i for i in parrallel_loader if i is not None])
- total_time = datetime.datetime.now() - time_start
- logging.info(f"Total working time {total_time}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement