SHARE
TWEET

Untitled

a guest Jul 22nd, 2019 71 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import json
  2.  
  3. import shutil
  4. import os
  5. from time import time
  6. import pandas as pd
  7. import numpy as np
  8.  
  9. from multiprocessing.dummy import Pool as ThreadPool
  10. from tqdm import tqdm
  11.  
  12. json_data = 'new_dataset.json'
  13.  
  14. with open(json_data, 'r') as f:
  15.     car_dict = json.load(f)
  16.    
  17. car_dict['automobile_type1'] = car_dict.pop('Автомобильные (тип 1)')  
  18. car_type_keys = list(car_dict.keys())
  19.  
  20. def download_image(url, car_type, num_plate):
  21.     # r = get(url, stream=True, headers={'User-agent': 'Mozilla/5.0'})
  22.     # if r.status_code == 200:
  23.     path = 'images/{}'.format(car_type)
  24.     name = str(num_plate).replace(' ', '').upper()
  25.     command = 'wget ' + url + ' -O' + f'{path}/{name}.jpg'
  26.     os.system(command)
  27.     return (f'{path}/{name}', car_type, num_plate)
  28.  
  29. pool = ThreadPool(100)
  30. results = []
  31. for car_type in car_type_keys:
  32.     dataset = pd.DataFrame(car_dict[car_type])
  33.     dataset['car_photos'] = dataset['car_photos'].apply(lambda x: x[0].replace('/medium/', '/original/').replace('/m/', '/o/'))
  34.     dataset.drop_duplicates(subset=['car_photos'], inplace=True)
  35.     links = dataset['car_photos'].values
  36.     num_plates = dataset['number'].values
  37.     car_type = car_type if len(car_type.split()) == 0 else '_'.join(car_type.split())
  38.     if car_type not in os.listdir('images'):
  39.         os.mkdir('images/{}'.format(car_type))
  40.     else:
  41.         shutil.rmtree('images/{}'.format(car_type))
  42.         os.mkdir('images/{}'.format(car_type))
  43.        
  44.     results.append(pool.starmap(download_image, zip(links, [car_type for i in range(len(links))], num_plates)))
  45.     #for link, car_type, num_plate in tqdm(zip(links, [car_type for i in range(len(links))], num_plates)):
  46.     #    results.append(download_image(link, car_type, num_plate))
  47.  
  48. pd.DataFrame(np.vstack(results), columns=['path', 'class_name', 'num_plate']).to_csv('number_plate_images.csv', index=False)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top