Advertisement
Guest User

Untitled

a guest
Jul 22nd, 2019
143
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.87 KB | None | 0 0
  1. import json
  2.  
  3. import shutil
  4. import os
  5. from time import time
  6. import pandas as pd
  7. import numpy as np
  8.  
  9. from multiprocessing.dummy import Pool as ThreadPool
  10. from tqdm import tqdm
  11.  
  12. json_data = 'new_dataset.json'
  13.  
  14. with open(json_data, 'r') as f:
  15.     car_dict = json.load(f)
  16.    
  17. car_dict['automobile_type1'] = car_dict.pop('Автомобильные (тип 1)')  
  18. car_type_keys = list(car_dict.keys())
  19.  
  20. def download_image(url, car_type, num_plate):
  21.     # r = get(url, stream=True, headers={'User-agent': 'Mozilla/5.0'})
  22.     # if r.status_code == 200:
  23.     path = 'images/{}'.format(car_type)
  24.     name = str(num_plate).replace(' ', '').upper()
  25.     command = 'wget ' + url + ' -O' + f'{path}/{name}.jpg'
  26.     os.system(command)
  27.     return (f'{path}/{name}', car_type, num_plate)
  28.  
  29. pool = ThreadPool(100)
  30. results = []
  31. for car_type in car_type_keys:
  32.     dataset = pd.DataFrame(car_dict[car_type])
  33.     dataset['car_photos'] = dataset['car_photos'].apply(lambda x: x[0].replace('/medium/', '/original/').replace('/m/', '/o/'))
  34.     dataset.drop_duplicates(subset=['car_photos'], inplace=True)
  35.     links = dataset['car_photos'].values
  36.     num_plates = dataset['number'].values
  37.     car_type = car_type if len(car_type.split()) == 0 else '_'.join(car_type.split())
  38.     if car_type not in os.listdir('images'):
  39.         os.mkdir('images/{}'.format(car_type))
  40.     else:
  41.         shutil.rmtree('images/{}'.format(car_type))
  42.         os.mkdir('images/{}'.format(car_type))
  43.        
  44.     results.append(pool.starmap(download_image, zip(links, [car_type for i in range(len(links))], num_plates)))
  45.     #for link, car_type, num_plate in tqdm(zip(links, [car_type for i in range(len(links))], num_plates)):
  46.     #    results.append(download_image(link, car_type, num_plate))
  47.  
  48. pd.DataFrame(np.vstack(results), columns=['path', 'class_name', 'num_plate']).to_csv('number_plate_images.csv', index=False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement