Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import json
- import shutil
- import os
- from time import time
- import pandas as pd
- import numpy as np
- from multiprocessing.dummy import Pool as ThreadPool
- from tqdm import tqdm
- json_data = 'new_dataset.json'
- with open(json_data, 'r') as f:
- car_dict = json.load(f)
- car_dict['automobile_type1'] = car_dict.pop('Автомобильные (тип 1)')
- car_type_keys = list(car_dict.keys())
- def download_image(url, car_type, num_plate):
- # r = get(url, stream=True, headers={'User-agent': 'Mozilla/5.0'})
- # if r.status_code == 200:
- path = 'images/{}'.format(car_type)
- name = str(num_plate).replace(' ', '').upper()
- command = 'wget ' + url + ' -O' + f'{path}/{name}.jpg'
- os.system(command)
- return (f'{path}/{name}', car_type, num_plate)
- pool = ThreadPool(100)
- results = []
- for car_type in car_type_keys:
- dataset = pd.DataFrame(car_dict[car_type])
- dataset['car_photos'] = dataset['car_photos'].apply(lambda x: x[0].replace('/medium/', '/original/').replace('/m/', '/o/'))
- dataset.drop_duplicates(subset=['car_photos'], inplace=True)
- links = dataset['car_photos'].values
- num_plates = dataset['number'].values
- car_type = car_type if len(car_type.split()) == 0 else '_'.join(car_type.split())
- if car_type not in os.listdir('images'):
- os.mkdir('images/{}'.format(car_type))
- else:
- shutil.rmtree('images/{}'.format(car_type))
- os.mkdir('images/{}'.format(car_type))
- results.append(pool.starmap(download_image, zip(links, [car_type for i in range(len(links))], num_plates)))
- #for link, car_type, num_plate in tqdm(zip(links, [car_type for i in range(len(links))], num_plates)):
- # results.append(download_image(link, car_type, num_plate))
- pd.DataFrame(np.vstack(results), columns=['path', 'class_name', 'num_plate']).to_csv('number_plate_images.csv', index=False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement