Guest User

Untitled

a guest
Mar 21st, 2018
129
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.84 KB | None | 0 0
  1. """ Downloading the Training data """
  2. import os
  3. from IPython.display import clear_output
  4. import pandas as pd # For manipulating CSV files
  5. import urllib.request # For downloading files from the provided links
  6. import time
  7. from termcolor import colored
  8.  
  9. test_dir='Test'
  10. train_dir='Train'
  11.  
  12. traincsv = pd.read_csv('myntra_train_dataset.csv')
  13. testcsv = pd.read_csv('myntra_test.csv')
  14.  
  15. if not os.path.exists(train_dir):
  16. os.mkdir(train_dir)
  17. start = time.time()
  18. for i in range(traincsv.shape[0]):
  19. link = traincsv.iloc[i]['Link_to_the_image']
  20. name = (traincsv.iloc[i]['Sub_category'])
  21. full_name = name+'_'+str(i)+'.jpg'
  22. img_name = full_name
  23. full_name = os.path.join(train_dir, img_name)
  24. if not os.path.exists(full_name):
  25. try:
  26. clear_output(wait=True)
  27. urllib.request.urlretrieve(link, full_name)
  28. print(colored(img_name+' downloaded', 'green'))
  29. except:
  30. clear_output(wait=True)
  31. print(colored('Link Missing', color='red'))
  32. else:
  33. clear_output(wait=True)
  34. print(img_name,' has already been downloaded')
  35. end = time.time()
  36. print('Time taken: ', end-start)
  37.  
  38.  
  39.  
  40.  
  41. """ Downloading the Testing data """
  42.  
  43. if not os.path.exists(test_dir):
  44. os.mkdir(test_dir)
  45. start = time.time()
  46. for i in range(402, testcsv.shape[0]):
  47. link = traincsv.iloc[i]['Link_to_the_image']
  48. name = str(i)+'.jpg'
  49. full_name = os.path.join(test_dir, name)
  50. if not os.path.exists(full_name):
  51. try:
  52. clear_output(wait=True)
  53. urllib.request.urlretrieve(link, full_name)
  54. print(colored(name+' downloaded', 'green'))
  55. except:
  56. clear_output(wait=True)
  57. print(colored('Link Missing', color='red'))
  58. else:
  59. clear_output(wait=True)
  60. print(name, ' has already been downloaded')
  61. end = time.time()
  62. print('Time taken: ', end-start)
Add Comment
Please, Sign In to add comment