Advertisement
daniilak

Untitled

Dec 15th, 2021
150
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.46 KB | None | 0 0
  1. from time import sleep
  2. from json import dumps
  3. from requests import get
  4. import os
  5. from PIL import Image
  6. SEARCH_HEAD={
  7.     'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
  8.     'accept-encoding': 'gzip, deflate, br',
  9.     'accept-language': 'ru,en;q=0.9',
  10.     'cache-control': 'no-cache',
  11.     'dnt': '1',
  12.     'pragma': 'no-cache',
  13.     'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="96", "Google Chrome";v="96"',
  14.     'sec-ch-ua-mobile': '?0',
  15.     'sec-ch-ua-platform': '"Windows"',
  16.     'sec-fetch-dest': 'document',
  17.     'sec-fetch-mode': 'navigate',
  18.     'sec-fetch-site': 'none',
  19.     'sec-fetch-user': '?1',
  20.     'upgrade-insecure-requests': '1',
  21.     'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36'
  22. }
  23. import requests
  24. from requests.adapters import HTTPAdapter
  25. from requests.packages.urllib3.util.retry import Retry
  26.  
  27. def func(index, photo, id_advert):
  28.    
  29.     extent=os.path.splitext(os.path.basename(photo))
  30.     if len(extent[1]) == 0:
  31.         extent = '.webp'
  32.     else:
  33.         extent = extent[1]
  34.     session = requests.Session()
  35.     retry = Retry(connect=3, backoff_factor=0.5)
  36.     adapter = HTTPAdapter(max_retries=retry)
  37.     session.mount('http://', adapter)
  38.     session.mount('https://', adapter)
  39.  
  40.     try:
  41.         photoPage = session.get(photo,  headers=SEARCH_HEAD)
  42.         # photoPage = get(photo)
  43.     except Exception as e:
  44.         print("ERROR ID_ADVERT", str(e), id_advert, 'photo', photo)
  45.         return {'url':photo,'err': 'ERR'}
  46.    
  47.     if photoPage.status_code != 200:
  48.         return {'url':photo,'err': str(photoPage.status_code)}
  49.    
  50.     if photoPage.headers['Content-Type'] not in ("image/png", "image/jpeg", "image/jpg", 'image/webp'):
  51.         return {'url':photo,'err': 'content_type'}
  52.    
  53.     name_file = "photos/"+id_advert+'/'+str(index)+str(extent)
  54.     try:
  55.         with open(name_file, 'wb+') as f:
  56.             f.write(photoPage.content)
  57.         f.close()
  58.     except Exception as e:
  59.         print("Ошибка скачивания 1", str(e), id_advert, photo, '\n_____')  
  60.         return {'url':photo,'err': 'down'}
  61.    
  62.     if '.webp' in extent:
  63.         im = Image.open(name_file).convert("RGB")
  64.         name_file_new = name_file.replace('.webp','.jpg')
  65.         im.save(name_file_new, "jpeg")
  66.         os.remove(name_file)
  67.         extent = '.jpg'
  68.     # send_files_to_postgresql(connection, cursor, "/home/daniilak/photos/"+id_advert+'/'+str(index)+str(extent), extent, id_advert, photo)
  69.     return {
  70.         'index': index,
  71.         'extent':extent,
  72.         'url':photo,
  73.         'err': '',
  74.     }
  75. from joblib import Parallel, delayed
  76. import json
  77. import pandas
  78. wine_data = pandas.read_csv('photos.csv', sep = ';', usecols= ['id','photos'])
  79. wine_data = wine_data.iloc[11000:]
  80. a = wine_data.set_index('id').T.to_dict('list')
  81. del wine_data
  82. def myfun(id_advert):
  83.     print(id_advert)
  84.     photos = json.loads(a[id_advert][0])
  85.     os.makedirs("photos/"+str(id_advert), exist_ok=True)
  86.     index = 1
  87.     answer = []
  88.     for photo in photos:
  89.         # print(index, str(id_advert), photo)
  90.         answer.append(func(index, photo, str(id_advert)))
  91.         index = index + 1
  92.     f = open("photos/"+str(id_advert)+'/info.json', 'w+')
  93.     f.write(dumps(answer))
  94.     f.close()
  95.  
  96. results = Parallel(n_jobs=12)(delayed(myfun)(id_advert) for id_advert in a)
  97.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement