Guest User

Untitled

a guest
Jan 16th, 2018
104
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.03 KB | None | 0 0
  1. import os
  2. import io
  3. import struct
  4. import numpy as np
  5.  
  6. def load():
  7.  
  8. # loading into signed floats even though data in file is unisgned byte
  9. # need negative values and floats for mean normalization
  10. dtype = np.int8
  11.  
  12. # constants, defined as per the idx file format in use
  13. # labels file - 4 bytes magic no, 4 bytes number of rows, rest data
  14. # image file - 4 bytes magic no, 4 bytes number of items,
  15. # 4 bytes number of rows, 4 bytes number of columns. Rest data
  16. header_bytes = 4
  17. label_bytes = 1
  18. picture_bytes = 1
  19. num_classes = 10
  20.  
  21. # data dir. Project directory must be arranged as <project dir>/source/<py file>, and <project dir/data/two files below
  22. data_dir = "/data/"
  23. label_file = "train-labels-idx1-ubyte"
  24. image_file = "train-images-idx3-ubyte"
  25.  
  26. dir_path = os.path.abspath('..')+data_dir
  27. label_filename = os.path.join(dir_path, label_file)
  28. image_filename = os.path.join(dir_path, image_file)
  29.  
  30. print("Reading MNIST label data")
  31.  
  32. with open(label_filename, "rb") as label_mnist_file:
  33. #big endian file format
  34. label_magic_num, label_num_items = struct.unpack (
  35. ">II", label_mnist_file.read(8))
  36.  
  37. y = np.fromfile( label_mnist_file, dtype, -1, "")
  38.  
  39. #sanity check on collected data
  40. print("Finished reading MNIST labels, label array size %s"
  41. %str((y.shape)))
  42.  
  43. print("Reading MNIST image data")
  44.  
  45. with open(image_filename, "rb") as image_mnist_file:
  46. #big endian file format
  47. image_magic_num, image_num_items, \
  48. image_num_rows, image_num_columns = struct.unpack(
  49. ">IIII", image_mnist_file.read(16))
  50. # cannot change this statement, have to read from file into linear array
  51. X = np.fromfile ( image_mnist_file, dtype, -1, "")
  52.  
  53. X = np.reshape ( X, (784,60000))
  54.  
  55.  
  56. #sanity check
  57. print("Finished reading MNIST images, image array size %s" %(str(X.shape)))
  58.  
  59. return X.astype(np.float32), y.astype(np.float32), num_classes
Add Comment
Please, Sign In to add comment