Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import cv2
- import numpy as np
- import pandas as pd
- import urllib.request
- from tqdm import tqdm
- class DataDownloader:
- @staticmethod
- def url_to_image(url):
- req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
- return cv2.imdecode(np.asarray(bytearray(urllib.request.urlopen(req).read()), dtype=np.uint8), -1)
- def __init__(self, csv_path: str, dir2download: str):
- self.csv_path = csv_path
- self.dir2download = dir2download
- self.df = pd.read_csv(self.csv_path, usecols=["Name", "Photo"])
- os.makedirs(dir2download, exist_ok=True)
- def download_and_save(self, size: tuple = None):
- for d in tqdm(self.df.values, "Data Downloading..."):
- if not os.path.exists(f"{self.dir2download}/{d[0]}.jpg"):
- try:
- img = self.url_to_image(d[1])
- except:
- continue
- if size is not None:
- img = cv2.resize(img, size)
- cv2.imwrite(f"{self.dir2download}/{d[0]}.jpg", img)
- def rename_all(self):
- for i, img_p in enumerate(os.listdir(self.dir2download)):
- img_p = os.path.join(self.dir2download, img_p)
- os.rename(img_p, os.path.join(self.dir2download, f"{i}.jpg"))
- if __name__ == '__main__':
- dd = DataDownloader("data.csv", "dataset")
- dd.download_and_save()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement