SHARE
TWEET

Untitled

a guest Jun 27th, 2019 69 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import os
  2. import cv2
  3. import numpy as np
  4. import pandas as pd
  5. import urllib.request
  6.  
  7. from tqdm import tqdm
  8.  
  9.  
  10. class DataDownloader:
  11.     @staticmethod
  12.     def url_to_image(url):
  13.         req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
  14.  
  15.         return cv2.imdecode(np.asarray(bytearray(urllib.request.urlopen(req).read()), dtype=np.uint8), -1)
  16.  
  17.     def __init__(self, csv_path: str, dir2download: str):
  18.         self.csv_path = csv_path
  19.         self.dir2download = dir2download
  20.         self.df = pd.read_csv(self.csv_path, usecols=["Name", "Photo"])
  21.  
  22.         os.makedirs(dir2download, exist_ok=True)
  23.  
  24.     def download_and_save(self, size: tuple = None):
  25.         for d in tqdm(self.df.values, "Data Downloading..."):
  26.             if not os.path.exists(f"{self.dir2download}/{d[0]}.jpg"):
  27.                 try:
  28.                     img = self.url_to_image(d[1])
  29.                 except:
  30.                     continue
  31.  
  32.                 if size is not None:
  33.                     img = cv2.resize(img, size)
  34.                 cv2.imwrite(f"{self.dir2download}/{d[0]}.jpg", img)
  35.  
  36.     def rename_all(self):
  37.         for i, img_p in enumerate(os.listdir(self.dir2download)):
  38.             img_p = os.path.join(self.dir2download, img_p)
  39.             os.rename(img_p, os.path.join(self.dir2download, f"{i}.jpg"))
  40.  
  41.  
  42. if __name__ == '__main__':
  43.     dd = DataDownloader("data.csv", "dataset")
  44.     dd.download_and_save()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top