Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import json
- import shutil
- import os
- from time import time
- import pandas as pd
- import numpy as np
- from multiprocessing.dummy import Pool as ThreadPool
- json_data = 'new_dataset.json'
- with open(json_data, 'r') as f:
- car_dict = json.load(f)
- car_dict['automobile_type1'] = car_dict.pop('Автомобильные (тип 1)')
- car_type_keys = list(car_dict.keys())
- def download_image(url, car_type, num_plate):
- # r = get(url, stream=True, headers={'User-agent': 'Mozilla/5.0'})
- # if r.status_code == 200:
- path = 'images/{}'.format(car_type)
- name = '{}.jpg'.format(str(int(time()*1000000)))
- command = 'wget ' + url + ' -O' + f'{path}/{name}'
- os.system(command)
- return (f'{path}/{name}', car_type, num_plate)
- pool = ThreadPool(300)
- results = []
- for car_type in car_type_keys:
- links = pd.DataFrame(car_dict[car_type])['car_photos'].apply(lambda x: x[0]).values
- num_plates = pd.DataFrame(car_dict[car_type])['number'].values
- car_type = car_type if len(car_type.split()) == 0 else '_'.join(car_type.split())
- if car_type not in os.listdir('images'):
- os.mkdir('images/{}'.format(car_type))
- else:
- shutil.rmtree('images/{}'.format(car_type))
- os.mkdir('images/{}'.format(car_type))
- results.append(pool.starmap(download_image, zip(links, [car_type for i in range(len(links))], num_plates)))
- pd.DataFrame(np.vstack(results), columns=['path', 'class_name', 'num_plate']).to_csv('number_plate_images.csv', index=False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement