Advertisement
Guest User

Untitled

a guest
Jul 18th, 2019
136
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.26 KB | None | 0 0
  1. import requests
  2. import os
  3. import csv
  4. import json
  5. import datetime
  6. from PIL import Image
  7. from multiprocessing.dummy import Pool as ThreadPool
  8.  
  9.  
  10. import urllib.request as request
  11. import logging
  12.  
  13. root_pictures_folder = r'/Users/a.eryomin/Downloads/2217'
  14. path_to_folder_with_csv = r'/Users/a.eryomin/Downloads/'
  15.  
  16.  
  17. logging.basicConfig(filename=os.path.join(path_to_folder_with_csv, '920.log'), filemode='a',
  18. format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
  19. datefmt='%H:%M:%S', level=logging.INFO)
  20.  
  21.  
  22. def save_file(url_info, categ, subcateg):
  23. """
  24. :param url_info:
  25. :param categ:
  26. :param subcateg:
  27. :return:
  28. """
  29. try:
  30. image = Image.open(request.urlopen(url_info['url']))
  31.  
  32. path_to_save_folder = os.path.join(root_pictures_folder, categ, subcateg)
  33. if not os.path.exists(path_to_save_folder):
  34. os.makedirs(path_to_save_folder)
  35.  
  36. img_file_name = os.path.join(path_to_save_folder, url_info['id']+'.jpg')
  37. image.save(os.path.join(path_to_save_folder, img_file_name), 'JPEG', quality=60)
  38. except Exception as err:
  39. print(err)
  40. logging.error(f"Can't load {url_info['url']}")
  41.  
  42.  
  43. def downloder(line):
  44. try:
  45. categ, subcateg, product_id = line.split(',')
  46. api_youla_url = f'http://api.youla.io/api/v1/product/{product_id}'
  47. data = requests.get(api_youla_url).json()
  48. for pict_url in data['data']['images']:
  49. save_file(pict_url, categ, subcateg)
  50.  
  51. except Exception as err:
  52. pass
  53.  
  54.  
  55. def downloaderParllel(list_of_lines, threads=10):
  56. pool = ThreadPool(threads)
  57. pool.map(downloder, list_of_lines)
  58. pool.close()
  59. pool.join()
  60.  
  61.  
  62. if __name__ == '__main__':
  63.  
  64. path_to_csv = os.path.join(path_to_folder_with_csv, '2217.txt')
  65. time_start = datetime.datetime.now()
  66.  
  67. with open(path_to_csv, 'r') as f:
  68. lines = f.readlines()
  69. stripped_lines = [line.rstrip('\n') for line in lines]
  70.  
  71. downloaderParllel(stripped_lines, 20)
  72.  
  73. # with open('saved_picts.txt', 'a') as f:
  74. # f.writelines([i for i in parrallel_loader if i is not None])
  75.  
  76. total_time = datetime.datetime.now() - time_start
  77. logging.info(f"Total working time {total_time}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement