Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- from __future__ import print_function, division
- from theano.sandbox import cuda
- #path = "data/state/sample/"
- import utils_console; reload(utils_console)
- from utils_console import *
- from IPython.display import FileLink
- cuda.use('gpu0')
- # path = "data/state.tiny/"
- path = "data/state/"
- # batch_size=16
- # batch_size=64
- batch_size=48
- # (val_classes, trn_classes, val_labels, trn_labels, val_filenames, filenames, test_filenames) = get_classes(path)
- # Rather than using batches, we could just import all the data into an array to save some processing time.
- # (In most examples I'm using the batches, however - just because that's how I happened to start out.)
- ########################################
- #exit();
- ########################################
- _="""
- trn = get_data(path+'train')
- val = get_data(path+'valid')
- save_array(path+'results/val.dat', val)
- save_array(path+'results/trn.dat', trn)
- val = load_array(path+'results/val.dat')
- trn = load_array(path+'results/trn.dat')
- """
- # Since we have so little data, and it is similar to imagenet images (full color photos), using pre-trained VGG weights is likely to be helpful -
- # in fact it seems likely that we won't need to fine-tune the convolutional layer weights much, if at all.
- # So we can pre-compute the output of the last convolutional layer, as we did in lesson 3 when we experimented with dropout.
- # (However this means that we can't use full data augmentation, since we can't pre-compute something that changes every image.)
- vgg = Vgg16()
- model=vgg.model
- last_conv_idx = [i for i,l in enumerate(model.layers) if type(l) is Convolution2D][-1]
- conv_layers = model.layers[:last_conv_idx+1]
- conv_model = Sequential(conv_layers)
- (val_classes, trn_classes, val_labels, trn_labels, val_filenames, filenames, test_filenames) = get_classes(path)
- print("Executing: conv_model.predict_generator(batches, batches.nb_sample)")
- ###!!!batches = get_batches(path+'train', batch_size=batch_size)
- ###!!!conv_feat = conv_model.predict_generator(batches, batches.nb_sample)
- ###!!!save_array(path+'results/conv_feat.dat', conv_feat) # conv_feat = load_array(path+'results/conv_feat.dat')
- ######!!!
- conv_feat = load_array(path+'results/conv_feat.dat')
- ######!!!
- print("Executing: conv_model.predict_generator(val_batches, val_batches.nb_sample)")
- ###!!!val_batches = get_batches(path+'valid', batch_size=batch_size*2, shuffle=False)
- ###!!!conv_val_feat = conv_model.predict_generator(val_batches, val_batches.nb_sample)
- ###!!!save_array(path+'results/conv_val_feat.dat', conv_val_feat) # conv_val_feat = load_array(path+'results/conv_val_feat.dat')
- ######!!!
- conv_val_feat = load_array(path+'results/conv_val_feat.dat')
- ######!!!
- print("Executing: conv_model.predict_generator(test_batches, test_batches.nb_sample)")
- ###!!!test_batches = get_batches(path+'test', batch_size=batch_size)
- ###!!!conv_test_feat = conv_model.predict_generator(test_batches, test_batches.nb_sample)
- ###!!!save_array(path+'results/conv_test_feat.dat', conv_test_feat) # load_array(path+'results/conv_test_feat.dat')
- ######!!!
- conv_test_feat = load_array(path+'results/conv_test_feat.dat')
- ######!!!
- # ### Pre-computed data augmentation + dropout
- # We'll use our usual data augmentation parameters:
- gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05, shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)
- da_batches = get_batches(path+'train', gen_t, batch_size=batch_size, shuffle=False)
- # We use those to create a dataset of convolutional features 5x bigger than the training set.
- ###!!!da_conv_feat = conv_model.predict_generator(da_batches, da_batches.nb_sample*5)
- ###!!!save_array(path+'results/da_conv_feat2.dat', da_conv_feat) # da_conv_feat = load_array(path+'results/da_conv_feat2.dat')
- ######!!!
- ###da_conv_feat = conv_model.predict_generator(da_batches, da_batches.nb_sample*2)
- ###save_array(path+'results/da_conv_feat2a.dat', da_conv_feat) # da_conv_feat = load_array(path+'results/da_conv_feat2a.dat')
- da_conv_feat = load_array(path+'results/da_conv_feat2a.dat')
- ######!!!
- # Let's include the real training data as well in its non-augmented form.
- da_conv_feat = np.concatenate([da_conv_feat, conv_feat])
- # Since we've now got a dataset 6x bigger than before, we'll need to copy our labels 6 times too.
- ###!!!da_trn_labels = np.concatenate([trn_labels]*6)
- ######!!!
- da_trn_labels = np.concatenate([trn_labels]*3)
- ######!!!
- # Based on some experiments the previous model works well, with bigger dense layers.
- def get_bn_da_layers(bn, dense):
- return [
- MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
- Flatten(),
- Dropout(bn),
- Dense(dense, activation='relu'),
- BatchNormalization(),
- Dropout(bn),
- Dense(dense, activation='relu'),
- BatchNormalization(),
- Dropout(bn),
- Dense(10, activation='softmax')
- ]
- #--------------------------------------------------------------
- def fitx(bn, dense, lr, epochs):
- print("Fitting: ", bn, dense, lr, epochs)
- x_model = Sequential(get_bn_da_layers(bn, dense))
- x_model.compile(Adam(lr=lr), loss='categorical_crossentropy', metrics=['accuracy'])
- x_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, nb_epoch=epochs, validation_data=(conv_val_feat, val_labels))
- return x_model
- #--------------------------------------------------------------
- bn_model = fitx(0.6, 640, 0.000004, 12)
- _="""
- bn_model = Sequential(get_bn_da_layers(0.8, 256))
- bn_model.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
- # Now we can train the model as usual, with pre-computed augmented data.
- bn_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, nb_epoch=1, validation_data=(conv_val_feat, val_labels))
- bn_model.optimizer.lr=0.01
- bn_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, nb_epoch=4, validation_data=(conv_val_feat, val_labels))
- bn_model.optimizer.lr=0.0001
- bn_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, nb_epoch=4, validation_data=(conv_val_feat, val_labels))
- """
- # Looks good - let's save those weights.
- bn_model.save_weights(path+'models/da_conv8_1.h5')
- _="""
- # ### Pseudo labeling
- # We're going to try using a combination of [pseudo labeling](http://deeplearning.net/wp-content/uploads/2013/03/pseudo_label_final.pdf) and [knowledge distillation](https://arxiv.org/abs/1503.02531) to allow us to use unlabeled data (i.e. do semi-supervised learning). For our initial experiment we'll use the validation set as the unlabeled data, so that we can see that it is working without using the test set. At a later date we'll try using the test set.
- # To do this, we simply calculate the predictions of our model...
- val_pseudo = bn_model.predict (conv_val_feat, batch_size=batch_size)
- # ...concatenate them with our training labels...
- comb_pseudo = np.concatenate ([da_trn_labels, val_pseudo])
- comb_feat = np.concatenate ([da_conv_feat, conv_val_feat])
- # ...and fine-tune our model using that data.
- bn_model.load_weights(path+'models/da_conv8_1.h5')
- bn_model.fit(comb_feat, comb_pseudo, batch_size=batch_size, nb_epoch=1, validation_data=(conv_val_feat, val_labels))
- bn_model.fit(comb_feat, comb_pseudo, batch_size=batch_size, nb_epoch=4, validation_data=(conv_val_feat, val_labels))
- bn_model.optimizer.lr=0.00001
- bn_model.fit(comb_feat, comb_pseudo, batch_size=batch_size, nb_epoch=4, validation_data=(conv_val_feat, val_labels))
- # That's a distinct improvement - even although the validation set isn't very big. This looks encouraging for when we try this on the test set.
- bn_model.save_weights(path+'models/bn-ps8.h5')
- """
- # ### Submit
- # We'll find a good clipping amount using the validation set, prior to submitting.
- def do_clip(arr, mx):
- return np.clip(arr, (1-mx)/9, mx)
- ###########################
- # Trying to come up with missing 'val_preds'
- val_preds = bn_model.predict(conv_val_feat, batch_size=batch_size)
- ###########################
- x = keras.metrics.categorical_crossentropy(val_labels, do_clip(val_preds, 0.93)).eval()
- # Should be loaded already
- # conv_test_feat = load_array(path+'results/conv_test_feat.dat')
- pass
- preds = bn_model.predict(conv_test_feat, batch_size=batch_size*2)
- # preds # .shape = (46, 10)
- # array([[ 0.089 , 0.264 , 0.058 , 0.151 , 0.0669, 0.0292, 0.1107, 0.0357, 0.0402, 0.1553],
- # [ 0.0349, 0.1944, 0.0846, 0.0641, 0.0468, 0.0275, 0.4069, 0.0507, 0.0507, 0.0395],
- subm = do_clip(preds,0.93) # subm - same as 'preds' above
- subm_name = path+'results/subm.gz'
- batches = get_batches(path+'train', batch_size=batch_size)
- classes = sorted(batches.class_indices, key=batches.class_indices.get)
- submission = pd.DataFrame(subm, columns=classes)
- submission.insert(0, 'img', [a[4:] for a in test_filenames])
- submission.head()
- submission.to_csv(subm_name, index=False, compression='gzip')
- # FileLink(subm_name)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement