Advertisement
Guest User

Untitled

a guest
Apr 24th, 2017
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.12 KB | None | 0 0
  1. import numpy as np
  2. import os
  3. import tarfile
  4. from scipy.ndimage import imread
  5. from keras.utils import np_utils
  6.  
  7. def read_data():
  8.     data = {}
  9.     img_shape = None
  10.     print("#pictures:")
  11.     with tarfile.open('notMNIST.tar.gz', 'r:gz') as tar_file:
  12.         cnt = 0
  13.         bad_pictures = 0
  14.         for f in tar_file:
  15.             if cnt % 2000 == 0:
  16.                 print(cnt)
  17.             cnt += 1
  18.             if os.path.splitext(f.name)[1] != '.png' or f.size <= 0:
  19.                 bad_pictures += 1
  20.                 continue
  21.                
  22.             clazz = ord(os.path.split(os.path.dirname(f.name))[1]) - ord('A')
  23.             if clazz not in data:
  24.                 data[clazz] = []
  25.             img = imread(tar_file.extractfile(f))
  26.             if img_shape is None:
  27.                 img_shape = img.shape
  28.             data[clazz].append(img)
  29.            
  30.         nb_classes = len(data)
  31.         for clazz in data:
  32.                 data[clazz] = (np.array(data[clazz]),
  33.                                   np_utils.to_categorical([clazz]*len(data[clazz]), nb_classes))
  34.    
  35.     return (bad_pictures, img_shape, data)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement