Advertisement
Guest User

Untitled

a guest
Jun 27th, 2019
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.38 KB | None | 0 0
  1. import os
  2. import cv2
  3. import numpy as np
  4. import pandas as pd
  5. import urllib.request
  6.  
  7. from tqdm import tqdm
  8.  
  9.  
  10. class DataDownloader:
  11. @staticmethod
  12. def url_to_image(url):
  13. req = urllib.request.Request(url, headers={'User-Agent': 'Mozilla/5.0'})
  14.  
  15. return cv2.imdecode(np.asarray(bytearray(urllib.request.urlopen(req).read()), dtype=np.uint8), -1)
  16.  
  17. def __init__(self, csv_path: str, dir2download: str):
  18. self.csv_path = csv_path
  19. self.dir2download = dir2download
  20. self.df = pd.read_csv(self.csv_path, usecols=["Name", "Photo"])
  21.  
  22. os.makedirs(dir2download, exist_ok=True)
  23.  
  24. def download_and_save(self, size: tuple = None):
  25. for d in tqdm(self.df.values, "Data Downloading..."):
  26. if not os.path.exists(f"{self.dir2download}/{d[0]}.jpg"):
  27. try:
  28. img = self.url_to_image(d[1])
  29. except:
  30. continue
  31.  
  32. if size is not None:
  33. img = cv2.resize(img, size)
  34. cv2.imwrite(f"{self.dir2download}/{d[0]}.jpg", img)
  35.  
  36. def rename_all(self):
  37. for i, img_p in enumerate(os.listdir(self.dir2download)):
  38. img_p = os.path.join(self.dir2download, img_p)
  39. os.rename(img_p, os.path.join(self.dir2download, f"{i}.jpg"))
  40.  
  41.  
  42. if __name__ == '__main__':
  43. dd = DataDownloader("data.csv", "dataset")
  44. dd.download_and_save()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement