Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import tensorflow as tf
- import numpy as np # dealing with arrays
- import time as time
- from datetime import timedelta
- from Gaussian import gauss_decay_step
- TRAIN_DIR_CATS = 'C:/Users/Alberto/Desktop/Coding/data/PetImages/Cat'
- TRAIN_DIR_DOGS = 'C:/Users/Alberto/Desktop/Coding/data/PetImages/Dog'
- IMG_SIZE = 50
- NUM_CLASSES = 2
- LR = tf.get_variable(name = "learning_rate", dtype= tf.float32, shape=[])
- BATCH_SIZE = 16
- NUM_EPOCHS = 5 #number of times it runs through all the data for each model run
- NUM_RUN = 0 #Keep track of how many models we've run
- def conv2d(x, W, b, strides=1):
- # Conv2D wrapper, with bias and relu activation
- x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
- x = tf.nn.bias_add(x, b)
- return tf.nn.relu(x)
- def conv2d_batch(x, W, b, strides=1):
- # Conv2D wrapper, with bias and relu activation
- x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
- x = tf.nn.bias_add(x, b)
- x = tf.contrib.layers.batch_norm(x, decay=0.99, center=True, scale=True,
- updates_collections=None,
- is_training=is_training,
- reuse=None,
- trainable=True
- )
- return tf.nn.relu(x)
- def maxpool2d(x, k=2):
- # MaxPool2D wrapper
- return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
- padding='SAME')
- #shape must be of shape [height, width, channels, num filters]
- def new_weights(shape):
- return tf.Variable(tf.truncated_normal(shape, stddev=0.05))
- def new_biases(length):
- return tf.Variable(tf.constant(0.05, shape=[length]))
- def forward_pass(input, reuse):
- with tf.variable_scope("convnet", reuse=reuse):
- for i in range(len(weights)-1):
- if i == 0:
- convnet = conv2d(input, weights[0], biases[0])
- convnet = maxpool2d(convnet)
- else:
- convnet = conv2d(convnet, weights[i], biases[i])
- convnet = maxpool2d(convnet)
- convnet = tf.contrib.layers.flatten(convnet)
- convnet = tf.add(tf.matmul(convnet, weights[-1]), biases[-1])
- return convnet
- def forward_pass_batch(input, reuse):
- with tf.variable_scope("convnet", reuse=reuse):
- for i in range(len(weights)-1):
- if i == 0:
- convnet = conv2d_batch(input, weights[0], biases[0])
- convnet = maxpool2d(convnet)
- else:
- convnet = conv2d_batch(convnet, weights[i], biases[i])
- convnet = maxpool2d(convnet)
- convnet = tf.contrib.layers.flatten(convnet)
- convnet = tf.add(tf.matmul(convnet, weights[-1]), biases[-1])
- return convnet
- weights = [
- new_weights([5,5,1,16]),
- new_weights([5,5,16,32]),
- new_weights([5,5,32,32]),
- new_weights([5,5,32,64]),
- new_weights([5,5,64,256]),
- new_weights([1024,NUM_CLASSES])
- ]
- biases = [
- new_biases(16),
- new_biases(32),
- new_biases(32),
- new_biases(64),
- new_biases(256),
- new_biases(NUM_CLASSES)
- ]
- rate_multiplier = tf.get_variable(name = "rate_multiplier", dtype= tf.float32, shape=[len(weights)])
- gaussian = tf.placeholder(name = "gaussian", dtype= tf.float32, shape=[])
- #for debugging. Calculates the average of the absolute value of the gradients at each layer
- def gradient_per_layer_list():
- returnlist = []
- for i in range(len(weights)-1):
- gradient_per_layer = tf.reduce_mean(tf.abs(weight_gradient_placeholder))
- returnlist.append(gradient_per_layer)
- gradient_per_layer = tf.reduce_mean(tf.abs(fc_weight_gradient_placeholder))
- returnlist.append(gradient_per_layer)
- return returnlist
- #returns list of parameter update operations for weights and biases [w[0],b[0]...w[i],b[i]]
- def update_operations():
- update_ops = []
- with tf.name_scope('gradient-update'):
- #convolutional weights and biases
- for i in range(len(weights)-1):
- update_ops.append(weights[i].assign(tf.subtract(weights[i], tf.multiply(weight_gradient_placeholder, tf.multiply(LR, rate_multiplier[i])))))
- update_ops.append(biases[i].assign(tf.subtract(biases[i], tf.multiply(bias_gradient_placeholder, tf.multiply(LR, rate_multiplier[i])))))
- #fully connected weight and bias
- update_ops.append(weights[-1].assign(tf.subtract(weights[-1], tf.multiply(fc_weight_gradient_placeholder, tf.multiply(LR, rate_multiplier[i])))))
- update_ops.append(biases[-1].assign(tf.subtract(biases[-1], tf.multiply(bias_gradient_placeholder, tf.multiply(LR, rate_multiplier[i])))))
- return update_ops
- def multipliers_update():
- multipliers = []
- for i in range(len(weights)):
- multipliers.append(rate_multiplier[i].assign(gaussian))
- return multipliers
- #Separate data into train, test, and validation set
- data = np.load('train_data.npy')
- train_set = data[4000:]
- test_set = data[400:4000]
- validation_set = data[:400]
- #Separate training set into X and Y
- X_train = np.array([i[0] for i in train_set]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
- Y_train = np.array([i[1] for i in train_set])
- X_test = np.array([i[0] for i in test_set]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
- Y_test = np.array([i[1] for i in test_set])
- print("Training dataset size", X_train.size)
- print("batch size: ", BATCH_SIZE )
- # Create placeholders for our data
- input = tf.placeholder(tf.float32, shape=[None, IMG_SIZE, IMG_SIZE, 1], name='input')
- y_true = tf.placeholder(tf.int64, shape=[None, NUM_CLASSES], name='y_true')
- weight_gradient_placeholder = tf.placeholder(tf.float32, shape=[None,None,None,None])
- fc_weight_gradient_placeholder = tf.placeholder(tf.float32, shape=[None,None])
- bias_gradient_placeholder = tf.placeholder(tf.float32, shape=[None])
- is_training = tf.placeholder(tf.bool, name='is_training')
- #Tensorboard: summary writers
- test_writers = []
- def model(X_train = X_train,Y_train = Y_train, X_test = X_test, Y_test = Y_test, learning_rate = .001,
- num_epochs = NUM_EPOCHS, minibatch_size = BATCH_SIZE,
- batchnorm = False,
- lr_decay_use = False, lr_decay = 1, exp_decay=False, decay_steps = 1,
- SD_MULT = None, REDUX = 0):
- '''
- :param X_train: Train data
- :param Y_train:
- :param X_test:
- :param Y_test:
- :param learning_rate:
- :param num_epochs: How many times we run through the complete data set
- :param minibatch_size: How big is each mini batch
- :param lr_decay_use: set to True to use lr_decay
- :param lr_decay: lr_decay * lr after every batch
- :param exp_decay: Set to True to use exponential learning rate decay exp decay = LR * lr_decay ** (global_step/ decay_steps)
- :param decay_steps: used to
- :param SD_MULT:
- :param REDUX:
- '''
- global NUM_RUN
- epoch_step = 0
- global_step = 0
- train_size = X_train.shape[0]
- #minibatches in every epoch
- tot_minibatches = int(train_size / BATCH_SIZE)
- decay_steps = decay_steps * tot_minibatches
- #set
- if batchnorm == False:
- pred_train = forward_pass(input, reuse = False)
- pred_test = forward_pass(input, reuse = True)
- else:
- with tf.variable_scope(tf.get_variable_scope(), reuse=False):
- pred_train = forward_pass_batch(input, reuse=False)
- pred_test = forward_pass_batch(input, reuse=True)
- test_pred = tf.equal(tf.argmax(pred_test, 1), tf.argmax(y_true,1))
- test_accuracy = tf.reduce_mean(tf.cast(test_pred, tf.float32))
- test_summary = tf.summary.scalar('test_accuracy', test_accuracy)
- # Apply softmax
- cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = pred_train, labels = y_true))
- gradient_per_layer = gradient_per_layer_list()
- #Calculate gradient of cost function with respect to weights, biases
- parameter_gradients = tf.gradients(cost, [*weights, *biases])
- update_ops = update_operations()
- multipliers = multipliers_update()
- init = tf.global_variables_initializer()
- #Learning rate decay operations
- #Are we doing exponential decay or standard lr decay
- if (exp_decay == True):
- lr_decay_op = tf.multiply(LR, lr_decay ** (global_step/ decay_steps))
- else:
- lr_decay_op = tf.multiply(LR, lr_decay)
- lr_decay = LR.assign(lr_decay_op)
- with tf.Session() as sess:
- sess.run(init)
- #Tensorboard: create a file writer for every model run
- if batchnorm == True:
- test_writers.append(tf.summary.FileWriter('C:/Users/Alberto/Desktop/Coding/TensorboardLogs/test/run/batchrun' + str(NUM_RUN)))
- test_writers.append(tf.summary.FileWriter('C:/Users/Alberto/Desktop/Coding/TensorboardLogs/test/run'+str(NUM_RUN)))
- mergeall_op = tf.summary.merge_all()
- # reset weights and biases for new run
- for i in range(len(weights)):
- sess.run(weights[i].assign(tf.truncated_normal(weights[i].shape, stddev=0.05)))
- sess.run(biases[i].assign(tf.constant(0.05, shape=biases[i].shape)))
- sess.run(rate_multiplier[i].assign(1))
- for i in range(len(test_writers)):
- test_writers[i].flush
- #initialzize learning rate and create learning rate update operation
- sess.run(LR.assign(learning_rate))
- start_time = time.time()
- while epoch_step < NUM_EPOCHS: #Every epoch is a run through ALL of the data
- # Shuffle the data
- p = np.random.permutation(len(X_train))
- X_train, Y_train = X_train[p], Y_train[p]
- #Minibatch Gradient Descent, mini batch size is defined as BATCH_SIZE
- for batch_step in range(tot_minibatches):
- #Set up feed_dict of mini batches
- index_start = batch_step * BATCH_SIZE
- index_end = index_start + BATCH_SIZE
- x_batch, y_true_batch = X_train[index_start:index_end], Y_train[index_start:index_end]
- feed_dict = {input: x_batch,
- y_true: y_true_batch}
- #Here is where we're going to set the learning rate multipliers according to the gaussian decay function
- if SD_MULT != None:
- multipliers_in = gauss_decay_step(NUM_LAYERS=len(weights), TOT_ITERATIONS=num_epochs*tot_minibatches, step= global_step, SD_MULT=SD_MULT, REDUX = REDUX)
- for i in range(len(weights)):
- sess.run(multipliers[i], {gaussian : multipliers_in[i]})
- cur_grad = sess.run(parameter_gradients, {input: x_batch, y_true: y_true_batch, is_training: True})
- #Run parameter update for convolution layers
- for i in range(len(weights)-1):
- sess.run(update_ops[i*2], {weight_gradient_placeholder: cur_grad[i]})
- sess.run(update_ops[i*2+1], {bias_gradient_placeholder: cur_grad[len(weights)+ i]})
- #Now update fully connected layer params
- sess.run(update_ops[-2], {fc_weight_gradient_placeholder: cur_grad[len(weights)-1]})
- sess.run(update_ops[-1], {bias_gradient_placeholder: cur_grad[-1]})
- #Apply learning decay
- if(lr_decay_use == True):
- sess.run(lr_decay)
- # Tensorboard Summaries
- summaries = sess.run(mergeall_op, {input: x_batch, y_true: y_true_batch, is_training: False})
- # Print results every n steps
- if global_step % 50 == 0:
- print("Step: ", global_step)
- end_time = time.time()
- time_dif = end_time - start_time
- print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))
- start_time = time.time()
- #Tensorboard: write data to tensorboard
- summary = sess.run(mergeall_op, feed_dict={input: X_test,
- y_true: Y_test, is_training: False})
- test_writers[NUM_RUN].add_summary(summary, global_step)
- test_writers[NUM_RUN].flush
- batch_step += 1
- global_step += 1
- print("Epoch: " + str(epoch_step + 1))
- epoch_step += 1
- NUM_RUN = NUM_RUN + 1
- return
Add Comment
Please, Sign In to add comment