Untitled

# -*- coding: utf-8 -*-
from __future__ import print_function, division

from theano.sandbox import cuda
#path = "data/state/sample/"
import utils_console; reload(utils_console)
from utils_console import *
from IPython.display import FileLink

cuda.use('gpu0')

# path = "data/state.tiny/"
path = "data/state/"

# batch_size=16
# batch_size=64
batch_size=48


# (val_classes, trn_classes, val_labels, trn_labels, val_filenames, filenames, test_filenames) = get_classes(path)

# Rather than using batches, we could just import all the data into an array to save some processing time.
# (In most examples I'm using the batches, however - just because that's how I happened to start out.)

########################################
#exit();
########################################

_="""
trn = get_data(path+'train')
val = get_data(path+'valid')

save_array(path+'results/val.dat', val)
save_array(path+'results/trn.dat', trn)

val = load_array(path+'results/val.dat')
trn = load_array(path+'results/trn.dat')
"""

# Since we have so little data, and it is similar to imagenet images (full color photos), using pre-trained VGG weights is likely to be helpful -
# in fact it seems likely that we won't need to fine-tune the convolutional layer weights much, if at all.
# So we can pre-compute the output of the last convolutional layer, as we did in lesson 3 when we experimented with dropout.
# (However this means that we can't use full data augmentation, since we can't pre-compute something that changes every image.)

vgg = Vgg16()
model=vgg.model
last_conv_idx = [i for i,l in enumerate(model.layers) if type(l) is Convolution2D][-1]
conv_layers = model.layers[:last_conv_idx+1]

conv_model = Sequential(conv_layers)

(val_classes, trn_classes, val_labels, trn_labels, val_filenames, filenames, test_filenames) = get_classes(path)

print("Executing: conv_model.predict_generator(batches, batches.nb_sample)")
###!!!batches      = get_batches(path+'train', batch_size=batch_size)
###!!!conv_feat      = conv_model.predict_generator(batches, batches.nb_sample)
###!!!save_array(path+'results/conv_feat.dat', conv_feat)             # conv_feat = load_array(path+'results/conv_feat.dat')
######!!!
conv_feat = load_array(path+'results/conv_feat.dat')
######!!!

print("Executing: conv_model.predict_generator(val_batches, val_batches.nb_sample)")
###!!!val_batches  = get_batches(path+'valid', batch_size=batch_size*2, shuffle=False)
###!!!conv_val_feat  = conv_model.predict_generator(val_batches, val_batches.nb_sample)
###!!!save_array(path+'results/conv_val_feat.dat', conv_val_feat)     # conv_val_feat = load_array(path+'results/conv_val_feat.dat')
######!!!
conv_val_feat = load_array(path+'results/conv_val_feat.dat')
######!!!

print("Executing: conv_model.predict_generator(test_batches, test_batches.nb_sample)")
###!!!test_batches = get_batches(path+'test',  batch_size=batch_size)
###!!!conv_test_feat = conv_model.predict_generator(test_batches, test_batches.nb_sample)
###!!!save_array(path+'results/conv_test_feat.dat', conv_test_feat) # load_array(path+'results/conv_test_feat.dat')
######!!!
conv_test_feat = load_array(path+'results/conv_test_feat.dat')
######!!!

# ### Pre-computed data augmentation + dropout
# We'll use our usual data augmentation parameters:

gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05, shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)
da_batches = get_batches(path+'train', gen_t, batch_size=batch_size, shuffle=False)

# We use those to create a dataset of convolutional features 5x bigger than the training set.
###!!!da_conv_feat = conv_model.predict_generator(da_batches, da_batches.nb_sample*5)
###!!!save_array(path+'results/da_conv_feat2.dat', da_conv_feat)      # da_conv_feat = load_array(path+'results/da_conv_feat2.dat')
######!!!
###da_conv_feat = conv_model.predict_generator(da_batches, da_batches.nb_sample*2)
###save_array(path+'results/da_conv_feat2a.dat', da_conv_feat)      # da_conv_feat = load_array(path+'results/da_conv_feat2a.dat')
da_conv_feat = load_array(path+'results/da_conv_feat2a.dat')
######!!!

# Let's include the real training data as well in its non-augmented form.
da_conv_feat = np.concatenate([da_conv_feat, conv_feat])

# Since we've now got a dataset 6x bigger than before, we'll need to copy our labels 6 times too.
###!!!da_trn_labels = np.concatenate([trn_labels]*6)
######!!!
da_trn_labels = np.concatenate([trn_labels]*3)
######!!!

# Based on some experiments the previous model works well, with bigger dense layers.
def get_bn_da_layers(bn, dense):
    return [
        MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
        Flatten(),
        Dropout(bn),
        Dense(dense, activation='relu'),
        BatchNormalization(),
        Dropout(bn),
        Dense(dense, activation='relu'),
        BatchNormalization(),
        Dropout(bn),
        Dense(10, activation='softmax')
        ]

#--------------------------------------------------------------
def fitx(bn, dense, lr, epochs):
    print("Fitting: ", bn, dense, lr, epochs)
    x_model = Sequential(get_bn_da_layers(bn, dense))
    x_model.compile(Adam(lr=lr), loss='categorical_crossentropy', metrics=['accuracy'])
    x_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, nb_epoch=epochs, validation_data=(conv_val_feat, val_labels))
    return x_model
#--------------------------------------------------------------

bn_model = fitx(0.6, 640, 0.000004, 12)

_="""
bn_model = Sequential(get_bn_da_layers(0.8, 256))
bn_model.compile(Adam(lr=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Now we can train the model as usual, with pre-computed augmented data.
bn_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, nb_epoch=1, validation_data=(conv_val_feat, val_labels))

bn_model.optimizer.lr=0.01
bn_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, nb_epoch=4, validation_data=(conv_val_feat, val_labels))

bn_model.optimizer.lr=0.0001

bn_model.fit(da_conv_feat, da_trn_labels, batch_size=batch_size, nb_epoch=4, validation_data=(conv_val_feat, val_labels))
"""

# Looks good - let's save those weights.
bn_model.save_weights(path+'models/da_conv8_1.h5')

_="""
# ### Pseudo labeling
# We're going to try using a combination of [pseudo labeling](http://deeplearning.net/wp-content/uploads/2013/03/pseudo_label_final.pdf) and [knowledge distillation](https://arxiv.org/abs/1503.02531) to allow us to use unlabeled data (i.e. do semi-supervised learning). For our initial experiment we'll use the validation set as the unlabeled data, so that we can see that it is working without using the test set. At a later date we'll try using the test set.
# To do this, we simply calculate the predictions of our model...

val_pseudo = bn_model.predict (conv_val_feat, batch_size=batch_size)

# ...concatenate them with our training labels...
comb_pseudo = np.concatenate ([da_trn_labels, val_pseudo])
comb_feat   = np.concatenate ([da_conv_feat, conv_val_feat])

# ...and fine-tune our model using that data.
bn_model.load_weights(path+'models/da_conv8_1.h5')
bn_model.fit(comb_feat, comb_pseudo, batch_size=batch_size, nb_epoch=1, validation_data=(conv_val_feat, val_labels))

bn_model.fit(comb_feat, comb_pseudo, batch_size=batch_size, nb_epoch=4, validation_data=(conv_val_feat, val_labels))

bn_model.optimizer.lr=0.00001

bn_model.fit(comb_feat, comb_pseudo, batch_size=batch_size, nb_epoch=4, validation_data=(conv_val_feat, val_labels))

# That's a distinct improvement - even although the validation set isn't very big. This looks encouraging for when we try this on the test set.
bn_model.save_weights(path+'models/bn-ps8.h5')
"""

# ### Submit

# We'll find a good clipping amount using the validation set, prior to submitting.
def do_clip(arr, mx):
    return np.clip(arr, (1-mx)/9, mx)

###########################
# Trying to come up with missing 'val_preds'
val_preds = bn_model.predict(conv_val_feat, batch_size=batch_size)
###########################

x = keras.metrics.categorical_crossentropy(val_labels, do_clip(val_preds, 0.93)).eval()

# Should be loaded already
# conv_test_feat = load_array(path+'results/conv_test_feat.dat')
pass

preds = bn_model.predict(conv_test_feat, batch_size=batch_size*2)

# preds # .shape = (46, 10)
# array([[ 0.089 ,  0.264 ,  0.058 ,  0.151 ,  0.0669,  0.0292,  0.1107,  0.0357,  0.0402,  0.1553],
#    [ 0.0349,  0.1944,  0.0846,  0.0641,  0.0468,  0.0275,  0.4069,  0.0507,  0.0507,  0.0395],

subm = do_clip(preds,0.93) # subm - same as 'preds' above

subm_name = path+'results/subm.gz'

batches = get_batches(path+'train', batch_size=batch_size)
classes = sorted(batches.class_indices, key=batches.class_indices.get)

submission = pd.DataFrame(subm, columns=classes)
submission.insert(0, 'img', [a[4:] for a in test_filenames])
submission.head()

submission.to_csv(subm_name, index=False, compression='gzip')

# FileLink(subm_name)