Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import multiprocessing.pool
- import os
- from functools import partial
- import h5py
- import numpy as np
- from keras import backend as K
- from keras.preprocessing.image import Iterator, _count_valid_files_in_directory, _list_valid_filenames_in_directory,
- ImageDataGenerator
- class MatFileIterGenerator(object):
- def __init__(self):
- self.image_data_generator = ImageDataGenerator()
- def flow_from_directory(self, directory, variable,
- target_size=(256, 256), classes=None, class_mode='categorical',
- batch_size=32, shuffle=True, seed=None,
- follow_links=False,
- interpolation='nearest'):
- return MatFilesIterator(
- directory, variable, self.image_data_generator,
- classes=classes, class_mode=class_mode,
- batch_size=batch_size, shuffle=shuffle, seed=seed,
- follow_links=follow_links,
- interpolation=interpolation)
- class MatFilesIterator(Iterator):
- def __init__(self, directory, variable, image_data_generator, classes=None, class_mode="categorical",
- batch_size=32, shuffle=True, seed=None, interpolation='nearest', follow_links=False):
- self.variable = variable
- self.directory = directory
- self.image_data_generator = image_data_generator
- self.data_format = K.image_data_format()
- self.classes = classes
- if class_mode not in {'categorical', 'binary', 'sparse',
- 'input', None}:
- raise ValueError('Invalid class_mode:', class_mode,
- '; expected one of "categorical", '
- '"binary", "sparse", "input"'
- ' or None.')
- self.class_mode = class_mode
- self.interpolation = interpolation
- white_list_formats = {"mat"}
- # first, count the number of samples and classes
- self.samples = 0
- if not classes:
- classes = []
- for subdir in sorted(os.listdir(directory)):
- if os.path.isdir(os.path.join(directory, subdir)):
- classes.append(subdir)
- self.num_classes = len(classes)
- self.class_indices = dict(zip(classes, range(len(classes))))
- pool = multiprocessing.pool.ThreadPool()
- function_partial = partial(_count_valid_files_in_directory,
- white_list_formats=white_list_formats,
- follow_links=follow_links)
- self.samples = sum(pool.map(function_partial,
- (os.path.join(directory, subdir)
- for subdir in classes)))
- print('Found %d files belonging to %d classes.' % (self.samples, self.num_classes))
- # second, build an index of the images in the different class subfolders
- results = []
- self.filenames = []
- self.classes = np.zeros((self.samples,), dtype='int32')
- i = 0
- for dirpath in (os.path.join(directory, subdir) for subdir in classes):
- results.append(pool.apply_async(_list_valid_filenames_in_directory,
- (dirpath, white_list_formats,
- self.class_indices, follow_links)))
- for res in results:
- classes, filenames = res.get()
- self.classes[i:i + len(classes)] = classes
- self.filenames += filenames
- i += len(classes)
- pool.close()
- pool.join()
- super(MatFilesIterator, self).__init__(self.samples, batch_size, shuffle, seed)
- def _get_batches_of_transformed_samples(self, index_array):
- # The script fails here with mentioned error
- # 60 is the mentioned constant row numbers
- batch_x = np.zeros(tuple([len(index_array)] + [60]), dtype=K.floatx())
- # build batch of numpy data
- for i, j in enumerate(index_array):
- fname = self.filenames[j]
- arr = np.array(h5py.File(os.path.join(self.directory, fname), "r").get(self.variable))
- arr = self.image_data_generator.random_transform(arr.astype(K.floatx()))
- arr = self.image_data_generator.standardize(arr)
- batch_x[i] = arr
- # build batch of labels
- if self.class_mode == 'input':
- batch_y = batch_x.copy()
- elif self.class_mode == 'sparse':
- batch_y = self.classes[index_array]
- elif self.class_mode == 'binary':
- batch_y = self.classes[index_array].astype(K.floatx())
- elif self.class_mode == 'categorical':
- batch_y = np.zeros((len(batch_x), self.num_classes), dtype=K.floatx())
- for i, label in enumerate(self.classes[index_array]):
- batch_y[i, label] = 1.
- else:
- return batch_x
- return batch_x, batch_y
- def next(self):
- """For python 2.x.
- # Returns
- The next batch.
- """
- with self.lock:
- index_array = next(self.index_generator)
- # The transformation of images is not under thread lock
- # so it can be done in parallel
- return self._get_batches_of_transformed_samples(index_array)
- from keras.layers import Activation, Conv2D, MaxPooling2D, GlobalMaxPooling2D, Dense, Dropout
- from keras.models import Sequential
- from matfileiter import MatFileIterGenerator
- class CNN2D:
- def __init__(self):
- self._model = Sequential()
- self._model.add(Conv2D(60, (3, 3), input_shape=(60, None , 1)))
- self._model.add(Activation("relu"))
- self._model.add(MaxPooling2D(pool_size=(3, 3)))
- self._model.add(Conv2D(60, (3, 3)))
- self._model.add(Activation("relu"))
- self._model.add(MaxPooling2D(pool_size=(3, 3)))
- self._model.add(Conv2D(120, (3, 3)))
- self._model.add(Activation("relu"))
- self._model.add(MaxPooling2D(pool_size=(3, 3)))
- self._model.add(GlobalMaxPooling2D())
- self._model.add(Dense(120))
- self._model.add(Activation('relu'))
- self._model.add(Dropout(0.2))
- self._model.add(Dense(1))
- self._model.add(Activation('sigmoid'))
- self._model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
- def createGenerators(self, train_path, variable, test_path, batch_size):
- train_datagen = MatFileIterGenerator()
- self._train_generator = train_datagen.flow_from_directory(
- train_path,
- variable,
- shuffle=True,
- batch_size=batch_size,
- class_mode="binary")
- test_datagen = MatFileIterGenerator()
- self._test_generator = test_datagen.flow_from_directory(
- test_path,
- variable,
- shuffle=True,
- batch_size=batch_size,
- class_mode="binary")
- def train_model(self, batch_size):
- self._model.fit_generator(
- self._train_generator,
- steps_per_epoch=2000 // batch_size,
- epochs=50,
- validation_data=self._test_generator,
- validation_steps=800 // batch_size,
- workers=2,
- use_multiprocessing=True)
- if __name__ == '__main__':
- cnn = CNN2D()
- cnn.createGenerators("/home/wilson/Documents/Data/_train_mat", "coeffs",
- "/home/wilosn/Documents/Data/_eval_mat", 20)
- cnn.train_model(20)
Add Comment
Please, Sign In to add comment