Untitled

import tensorflow as tf
import numpy as np         # dealing with arrays
import time as time
from datetime import timedelta
from Gaussian import gauss_decay_step

TRAIN_DIR_CATS = 'C:/Users/Alberto/Desktop/Coding/data/PetImages/Cat'
TRAIN_DIR_DOGS = 'C:/Users/Alberto/Desktop/Coding/data/PetImages/Dog'
IMG_SIZE = 50
NUM_CLASSES = 2
LR = tf.get_variable(name = "learning_rate", dtype= tf.float32, shape=[])
BATCH_SIZE = 16
NUM_EPOCHS = 5 #number of times it runs through all the data for each model run
NUM_RUN = 0  #Keep track of how many models we've run
def conv2d(x, W, b, strides=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def conv2d_batch(x, W, b, strides=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    x = tf.contrib.layers.batch_norm(x, decay=0.99, center=True, scale=True,
               updates_collections=None,
               is_training=is_training,
               reuse=None,
               trainable=True
               )
    return tf.nn.relu(x)

def maxpool2d(x, k=2):
    # MaxPool2D wrapper
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
                          padding='SAME')

#shape must be of shape [height, width, channels, num filters]
def new_weights(shape):
    return tf.Variable(tf.truncated_normal(shape, stddev=0.05))

def new_biases(length):
    return tf.Variable(tf.constant(0.05, shape=[length]))

def forward_pass(input, reuse):

    with tf.variable_scope("convnet", reuse=reuse):

        for i in range(len(weights)-1):
            if i == 0:
                convnet = conv2d(input, weights[0], biases[0])
                convnet = maxpool2d(convnet)

            else:
                convnet = conv2d(convnet, weights[i], biases[i])
                convnet = maxpool2d(convnet)

        convnet = tf.contrib.layers.flatten(convnet)
        convnet = tf.add(tf.matmul(convnet, weights[-1]), biases[-1])
    return convnet
def forward_pass_batch(input, reuse):

    with tf.variable_scope("convnet", reuse=reuse):

        for i in range(len(weights)-1):
            if i == 0:
                convnet = conv2d_batch(input, weights[0], biases[0])
                convnet = maxpool2d(convnet)

            else:
                convnet = conv2d_batch(convnet, weights[i], biases[i])
                convnet = maxpool2d(convnet)

        convnet = tf.contrib.layers.flatten(convnet)
        convnet = tf.add(tf.matmul(convnet, weights[-1]), biases[-1])
    return convnet

weights = [
    new_weights([5,5,1,16]),
    new_weights([5,5,16,32]),
    new_weights([5,5,32,32]),
    new_weights([5,5,32,64]),
    new_weights([5,5,64,256]),
    new_weights([1024,NUM_CLASSES])
    ]
biases = [
    new_biases(16),
    new_biases(32),
    new_biases(32),
    new_biases(64),
    new_biases(256),
    new_biases(NUM_CLASSES)
]

rate_multiplier = tf.get_variable(name = "rate_multiplier", dtype= tf.float32, shape=[len(weights)])
gaussian = tf.placeholder(name = "gaussian", dtype= tf.float32, shape=[])

#for debugging. Calculates the average of the absolute value of the gradients at each layer
def gradient_per_layer_list():
    returnlist = []
    for i in range(len(weights)-1):
        gradient_per_layer = tf.reduce_mean(tf.abs(weight_gradient_placeholder))
        returnlist.append(gradient_per_layer)
    gradient_per_layer = tf.reduce_mean(tf.abs(fc_weight_gradient_placeholder))
    returnlist.append(gradient_per_layer)
    return returnlist

#returns list of parameter update operations for weights and biases [w[0],b[0]...w[i],b[i]]
def update_operations():
    update_ops = []
    with tf.name_scope('gradient-update'):

        #convolutional weights and biases
        for i in range(len(weights)-1):
            update_ops.append(weights[i].assign(tf.subtract(weights[i], tf.multiply(weight_gradient_placeholder, tf.multiply(LR, rate_multiplier[i])))))
            update_ops.append(biases[i].assign(tf.subtract(biases[i], tf.multiply(bias_gradient_placeholder, tf.multiply(LR, rate_multiplier[i])))))

        #fully connected weight and bias
        update_ops.append(weights[-1].assign(tf.subtract(weights[-1], tf.multiply(fc_weight_gradient_placeholder, tf.multiply(LR, rate_multiplier[i])))))
        update_ops.append(biases[-1].assign(tf.subtract(biases[-1], tf.multiply(bias_gradient_placeholder, tf.multiply(LR, rate_multiplier[i])))))

    return update_ops


def multipliers_update():
    multipliers = []
    for i in range(len(weights)):
        multipliers.append(rate_multiplier[i].assign(gaussian))
    return multipliers


#Separate data into train, test, and validation set
data = np.load('train_data.npy')
train_set = data[4000:]
test_set = data[400:4000]
validation_set = data[:400]

#Separate training set into X and Y
X_train = np.array([i[0] for i in train_set]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
Y_train = np.array([i[1] for i in train_set])
X_test = np.array([i[0] for i in test_set]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
Y_test = np.array([i[1] for i in test_set])

print("Training dataset size", X_train.size)
print("batch size: ", BATCH_SIZE    )

# Create placeholders for our data
input = tf.placeholder(tf.float32, shape=[None, IMG_SIZE, IMG_SIZE, 1], name='input')
y_true = tf.placeholder(tf.int64, shape=[None, NUM_CLASSES], name='y_true')

weight_gradient_placeholder = tf.placeholder(tf.float32, shape=[None,None,None,None])
fc_weight_gradient_placeholder = tf.placeholder(tf.float32, shape=[None,None])
bias_gradient_placeholder = tf.placeholder(tf.float32, shape=[None])
is_training = tf.placeholder(tf.bool, name='is_training')

#Tensorboard: summary writers
test_writers = []

def model(X_train = X_train,Y_train = Y_train, X_test = X_test, Y_test = Y_test, learning_rate = .001,
          num_epochs = NUM_EPOCHS, minibatch_size = BATCH_SIZE,
          batchnorm = False,
          lr_decay_use = False, lr_decay = 1, exp_decay=False, decay_steps = 1,
          SD_MULT = None, REDUX = 0):

    '''

    :param X_train: Train data
    :param Y_train:
    :param X_test:
    :param Y_test:
    :param learning_rate:
    :param num_epochs: How many times we run through the complete data set
    :param minibatch_size: How big is each mini batch
    :param lr_decay_use: set to True to use lr_decay
    :param lr_decay: lr_decay * lr after every batch
    :param exp_decay: Set to True to use exponential learning rate decay      exp decay = LR * lr_decay ** (global_step/ decay_steps)
    :param decay_steps: used to
    :param SD_MULT:
    :param REDUX:
    '''

    global NUM_RUN
    epoch_step = 0
    global_step = 0
    train_size = X_train.shape[0]
    #minibatches in every epoch
    tot_minibatches = int(train_size / BATCH_SIZE)
    decay_steps = decay_steps * tot_minibatches

    #set
    if batchnorm == False:
        pred_train = forward_pass(input, reuse = False)
        pred_test = forward_pass(input, reuse = True)
    else:
        with tf.variable_scope(tf.get_variable_scope(), reuse=False):
            pred_train = forward_pass_batch(input, reuse=False)

        pred_test = forward_pass_batch(input, reuse=True)

    test_pred = tf.equal(tf.argmax(pred_test, 1), tf.argmax(y_true,1))
    test_accuracy = tf.reduce_mean(tf.cast(test_pred, tf.float32))
    test_summary = tf.summary.scalar('test_accuracy', test_accuracy)

    # Apply softmax
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = pred_train, labels = y_true))

    gradient_per_layer = gradient_per_layer_list()

    #Calculate gradient of cost function with respect to weights, biases
    parameter_gradients = tf.gradients(cost, [*weights, *biases])
    update_ops = update_operations()
    multipliers = multipliers_update()

    init = tf.global_variables_initializer()

    #Learning rate decay operations
    #Are we doing exponential decay or standard lr decay
    if (exp_decay == True):
        lr_decay_op = tf.multiply(LR, lr_decay ** (global_step/ decay_steps))
    else:
        lr_decay_op = tf.multiply(LR, lr_decay)

    lr_decay = LR.assign(lr_decay_op)

    with tf.Session() as sess:
        sess.run(init)

        #Tensorboard: create a file writer for every model run

        if batchnorm == True:
            test_writers.append(tf.summary.FileWriter('C:/Users/Alberto/Desktop/Coding/TensorboardLogs/test/run/batchrun' + str(NUM_RUN)))
        test_writers.append(tf.summary.FileWriter('C:/Users/Alberto/Desktop/Coding/TensorboardLogs/test/run'+str(NUM_RUN)))
        mergeall_op = tf.summary.merge_all()

        # reset weights and biases for new run
        for i in range(len(weights)):
            sess.run(weights[i].assign(tf.truncated_normal(weights[i].shape, stddev=0.05)))
            sess.run(biases[i].assign(tf.constant(0.05, shape=biases[i].shape)))
            sess.run(rate_multiplier[i].assign(1))
        for i in range(len(test_writers)):
            test_writers[i].flush

        #initialzize learning rate and create learning rate update operation
        sess.run(LR.assign(learning_rate))

        start_time = time.time()

        while epoch_step < NUM_EPOCHS:  #Every epoch is a run through ALL of the data

            # Shuffle the data
            p = np.random.permutation(len(X_train))
            X_train, Y_train = X_train[p], Y_train[p]

            #Minibatch Gradient Descent, mini batch size is defined as BATCH_SIZE
            for batch_step in range(tot_minibatches):
                #Set up feed_dict of mini batches
                index_start = batch_step * BATCH_SIZE
                index_end = index_start + BATCH_SIZE
                x_batch, y_true_batch = X_train[index_start:index_end], Y_train[index_start:index_end]
                feed_dict = {input: x_batch,
                             y_true: y_true_batch}

                #Here is where we're going to set the learning rate multipliers according to the gaussian decay function
                if SD_MULT != None:
                    multipliers_in = gauss_decay_step(NUM_LAYERS=len(weights), TOT_ITERATIONS=num_epochs*tot_minibatches, step= global_step, SD_MULT=SD_MULT, REDUX = REDUX)

                    for i in range(len(weights)):
                        sess.run(multipliers[i], {gaussian : multipliers_in[i]})
                cur_grad = sess.run(parameter_gradients, {input: x_batch, y_true: y_true_batch, is_training: True})
                #Run parameter update for convolution layers
                for i in range(len(weights)-1):
                    sess.run(update_ops[i*2], {weight_gradient_placeholder: cur_grad[i]})
                    sess.run(update_ops[i*2+1], {bias_gradient_placeholder: cur_grad[len(weights)+ i]})

                #Now update fully connected layer params
                sess.run(update_ops[-2], {fc_weight_gradient_placeholder: cur_grad[len(weights)-1]})
                sess.run(update_ops[-1], {bias_gradient_placeholder: cur_grad[-1]})

                #Apply learning decay
                if(lr_decay_use == True):
                    sess.run(lr_decay)

                # Tensorboard Summaries
                summaries = sess.run(mergeall_op, {input: x_batch, y_true: y_true_batch, is_training: False})


                # Print results every n steps
                if global_step % 50 == 0:
                    print("Step: ", global_step)
                    end_time = time.time()
                    time_dif = end_time - start_time
                    print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))
                    start_time = time.time()

                    #Tensorboard: write data to tensorboard
                    summary = sess.run(mergeall_op, feed_dict={input: X_test,
                                                               y_true: Y_test, is_training: False})
                    test_writers[NUM_RUN].add_summary(summary, global_step)
                    test_writers[NUM_RUN].flush

                batch_step += 1
                global_step += 1

            print("Epoch: " + str(epoch_step + 1))
            epoch_step += 1
    NUM_RUN = NUM_RUN + 1
    return