Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import os
- import tarfile
- from scipy.ndimage import imread
- from keras.utils import np_utils
- def read_data():
- data = {}
- img_shape = None
- print("#pictures:")
- with tarfile.open('notMNIST.tar.gz', 'r:gz') as tar_file:
- cnt = 0
- bad_pictures = 0
- for f in tar_file:
- if cnt % 2000 == 0:
- print(cnt)
- cnt += 1
- if os.path.splitext(f.name)[1] != '.png' or f.size <= 0:
- bad_pictures += 1
- continue
- clazz = ord(os.path.split(os.path.dirname(f.name))[1]) - ord('A')
- if clazz not in data:
- data[clazz] = []
- img = imread(tar_file.extractfile(f))
- if img_shape is None:
- img_shape = img.shape
- data[clazz].append(img)
- nb_classes = len(data)
- for clazz in data:
- data[clazz] = (np.array(data[clazz]),
- np_utils.to_categorical([clazz]*len(data[clazz]), nb_classes))
- return (bad_pictures, img_shape, data)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement