shallow.py

# -*- coding: utf-8 -*-
import time, os, fnmatch, cv2
import numpy as np

from keras.models import Sequential, load_model
from keras.optimizers import SGD
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, AveragePooling2D, ZeroPadding2D, Dropout, Flatten, merge, Reshape, Activation

from sklearn.metrics import log_loss

import keras
from keras import backend as K
from sklearn.model_selection import StratifiedKFold
from keras.utils import np_utils
from keras import regularizers
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.metrics import accuracy_score, classification_report

from keras.layers import Activation, Dense ##
from keras.layers.advanced_activations import LeakyReLU

image_size = 128
image_size_squared = image_size**2
num_classes = 2

training = False
testing = True

num_splits = 10


# consts train
batch_size = 256
nb_epoch = 10
test_p = 0.20 # deprecated
finetuned_path = 'custom_models/bois_newShallow-F65.h5'
finetuned_path_save = 'custom_models/bois_newShallow-F65.h5'
K.set_image_data_format('channels_last') # added


# https://medium.com/tebs-lab/how-to-classify-mnist-digits-with-different-neural-network-architectures-39c75a0f03e3


# aux functions
def image_process(img_path, img_size, process=None):
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img = cv2.resize(img, (img_size, img_size))

    if process == None:
        return np.float32(img)/255.0

    if 'contrast' in process:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        img[:,:,2] = cv2.equalizeHist(img[:,:,2])
        img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR)

    if 'negative' in process:
        img = 255-img

    if 'gray' in process:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    if 'blur' in process:
        img = cv2.GaussianBlur(img, (0, 0), 1.5)

    return np.float32(img)/255.0


def load_data(data_folder='_test/qualidade-redo/', img_size=image_size, process='contrast_gray', show_time=True):
    X = []
    y = []
    t0 = time.time()
    n = 0

    for root, dirnames, filenames in os.walk(data_folder):
        print(root)
        for img in fnmatch.filter(filenames, '*.[Jj][Pp][Gg]'): # case insensitive
            img = os.path.realpath(os.path.join(root,img))
            lbl = img.split('/')[-2].strip() # label. mudar se estrutura dirs for diferente
            X.append(image_process(img, img_size, process))
            y.append(lbl)
            n += 1

    if show_time:
        t = time.time()-t0
        print('Time to load "%s": %.2f seconds'%(data_folder, t))
        print('\tMean time per image: %.4f seconds'%(t/n))
    return np.array(X), np.array(y)


def shallow_model():
    model = Sequential()
    model.add(Dense(32, kernel_regularizer=regularizers.l2(0.001), input_dim=image_size_squared, activation='relu'))
    model.add(Dense(16, kernel_regularizer=regularizers.l2(0.001), activation='relu'))
    model.add(Dense(2, activation='softmax'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model


def classify(X, y, X_test, y_test, classifier, show_time=False):
    t0 = time.time()
    clf = KNN(n_neighbors=1, n_jobs=-1) if classifier=='knn' else SVC(decision_function_shape='ovr')
    clf.fit(X, y)
    t1 = time.time()
    pred = clf.predict(X_test)
    if show_time:
        t2 = time.time()
        print('Classification time: %.4f seconds'%(t1-t0))
        with open('logs/cnn_results.log', 'a+') as f:
            f.write('\nFit time: '+str(t1-t0))
            f.write('\nClassification time: '+str(t2-t1))
    else:
        return pred
    acc = accuracy_score(y_test, pred)
    print('\tAccuracy: ', acc)
    return acc


def load_append(vecX, vecY, data_folder):
    X_temp, Y_temp = load_data(data_folder)
    vecX = np.concatenate((vecX, X_temp))
    vecY = np.concatenate((vecY, Y_temp))
    return vecX, vecY


'''
A~ train: Puruna3, 1/2 USP (animais 1-27)
   valid: Puruna1, 1/2 USP (animais 28-51)
    test: Puruna2, Jersey
   A-30 0.54   A-35 0.55   A-65 0.54.   valid 0.90+, A-30 0.93 (nao conseguiu representar Jersey?)
'''

'''
B~ train: Puruna3, Jersey
   valid: Puruna2, 1/2 USP
    test: Puruna1, 1/2 USP
   B-30 0.80   B-50 0.75 (overfit).  valid 0.85
'''

'''
C~ train: Puruna3, 1/2 Jersey, 1/2 USP
   valid: Puruna2, 1/2 Jersey
    test: Puruna1, 1/2 USP
   C-30 0.86   C-60 0.85   C-90 0.83
'''

'''
D~ train: Puruna3, Jersey3, USP3
   valid: Puruna2, Jersey2, USP2
    test: Puruna1, Jersey1, USP1
   D-30 0.76   D-45 0.75   D-60 0.76
'''

'''
E~ train: Puruna3, Jersey2, Jersey3, USP2, USP3
   valid: Puruna2, Jersey1
    test: Puruna1, USP1
   E-15 0.84  E-20 0.84  E-25 0.84  E-35 0.83  E-45 0.83   E-55 0.82   E-69 0.81
'''

'''
F~ train: Puruna3, Puruna2, Jersey2, USP2
   valid: Puruna1-1, Jersey3, USP3
    test: Puruna1-2, Jersey2, USP2
   F-15 0.88  F-30 0.89  F-45 0.91  F-55 0.91  F-65 0.91
'''


if __name__ == '__main__':
    if training == True:
        X_train, Y_train = load_data(data_folder='_test/qualidade-newsample/Puruna3')
        X_train, Y_train = load_append(X_train, Y_train, data_folder='_test/qualidade-newsample/Puruna2')
        X_train, Y_train = load_append(X_train, Y_train, data_folder='_test/qualidade-newsample/Jersey2')
        X_train, Y_train = load_append(X_train, Y_train, data_folder='_test/qualidade-newsample/USP2')

        X_valid, Y_valid = load_data(data_folder='_test/qualidade-newsample/Puruna1-1')
        X_valid, Y_valid = load_append(X_valid, Y_valid, '_test/qualidade-newsample/Jersey3')
        X_valid, Y_valid = load_append(X_valid, Y_valid, '_test/qualidade-newsample/USP3')

        X_train = X_train.reshape(X_train.shape[0], image_size_squared)
        X_valid = X_valid.reshape(X_valid.shape[0], image_size_squared)

        # keras compatible format
        Y_train = keras.utils.to_categorical(Y_train, num_classes)
        Y_valid = keras.utils.to_categorical(Y_valid, num_classes)

        # faz fine-tuning se modelo fine-tuned nao existe
        if (not (os.path.exists(finetuned_path))):
            # Load our model
            model = shallow_model()
        else:
            model = load_model(finetuned_path)
        # Start Fine-tuning
        model.fit(X_train, Y_train,
                batch_size=batch_size,
                nb_epoch=nb_epoch,
                shuffle=True,
                verbose=1,
                validation_data=(X_valid, Y_valid),
                )
        model.save(finetuned_path_save)

    if testing == True: # testa o modelo com num_splits stratified k-fold
        sumtotal = 0
        sumcount = 0

        model = load_model(finetuned_path)
        skf = StratifiedKFold(num_splits)
        X_test_OG, Y_test_OG = load_data(data_folder='_test/qualidade-newsample/Puruna1-2')
        X_test_OG, Y_test_OG = load_append(X_test_OG, Y_test_OG, data_folder='_test/qualidade-newsample/Jersey2')
        X_test_OG, Y_test_OG = load_append(X_test_OG, Y_test_OG, data_folder='_test/qualidade-newsample/USP2')

        for train_index, test_index in skf.split(X_test_OG, Y_test_OG): # train, test
            X_test = X_test_OG.reshape(X_test_OG.shape[0], image_size_squared)
            Y_test = keras.utils.to_categorical(Y_test_OG, num_classes)
            X, X_t = X_test[train_index], X_test[test_index]
            y, y_t = Y_test[train_index], Y_test[test_index]

            print('Train Feature Extraction...')
            t0 = time.time()
            X = model.predict(x=X, batch_size=1)
            t1 = time.time()
            print('Train feature extraction finished in %f seconds.'%(t1-t0))
            X_t = model.predict(x=X_t, batch_size=1)
            t2 = time.time()
            print('Test feature extraction finished in %f seconds.'%(t2-t1))
            y_pred = classify(X, y, X_t, y_t, 'knn', False)
            accscore = accuracy_score(y_t, y_pred)
            sumtotal += len(y_t)
            ntested = round(len(y_t)*accscore)
            sumcount += ntested
            print(classification_report(y_t, y_pred))
            print('Acc:', accscore, len(y_t), len(y_t)*accscore)

        print('End:', sumcount, sumtotal, sumcount/sumtotal)