Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """ TODO:
- - n_timesteps for a visit is target value
- """
- import tensorflow as tf
- from data_loader.tf_io import parse_features
- from utils.helpers import sequence_length
- DATA_FILE = 'mockdata_is_test=False_conv=False.txt'
- DATA_FILE_TEST = 'mockdata_is_test=False_conv=False-test.txt'
- #DATA_FILE = 'raw_features_granularity_24m_period_720h_1490195909.861038_1000_is_test=False_conv=False'
- if 'mockdata' in DATA_FILE:
- OUTPUT_FILE_DIR = ''
- else:
- OUTPUT_FILE_DIR = '\\\\RMAPPS0118\\_Faelles\\UDV_EPJSTRUKTURERING\\mi104f17_data/'
- DATA_FILE = OUTPUT_FILE_DIR + DATA_FILE
- OUTPUT_LOG_DIR = OUTPUT_FILE_DIR + 'logs/'
- # Data sizes
- DATA_SIZE = 1002
- # Training Parameters
- learning_rate = 0.00001
- display_step = 50
- n_epochs = 15
- BATCH_SIZE = 15 # TODO: experimenter, måske 100-200
- # Network Parameters
- n_steps = 10 # VISIT_MAX_DURATION_IN_HOURS / SEQUENCE_LENGTH_IN_HOURS
- n_classes = 1 # regression
- n_features = 10 # VISIT_MAX_DURATION_IN_HOURS * 60 / SEQUENCE_LENGTH_IN_HOURS ????????????
- DIM_EMB = 200
- DIM_HIDDEN = 200
- # Define weight
- # TODO: https://www.youtube.com/watch?v=JYqjcHYTQgQ
- def RNN(emb, seq_len):
- """
- :param x: input with dimensions: BATCH_SIZE x n_steps x n_features
- :return: outputs
- """
- lstm_cell = tf.contrib.rnn.LSTMCell(DIM_HIDDEN)
- # TODO: States?
- outputs, states = tf.nn.dynamic_rnn(
- lstm_cell, emb, dtype=tf.float32, sequence_length=seq_len)
- # Return the outputs for each time step: (BATCH_SIZE, n_steps, DIM_HIDDEN)
- return outputs
- def model(x, y, seq_len):
- used, seq_len1 = sequence_length(x)
- # Transpose our matrix: (n_steps, BATCH_SIZE, n_features)
- x = tf.transpose(x, [1, 0, 2])
- # Reshaping to (n_steps*BATCH_SIZE, n_features)
- # -1: remove dimension
- x = tf.reshape(x, [-1, n_features])
- weight_emb = tf.get_variable(name='weight_emb', shape=[n_features, DIM_EMB], initializer=tf.random_normal_initializer())
- weight_out = tf.get_variable(name='weight_out', shape=[DIM_HIDDEN, n_classes], initializer=tf.random_normal_initializer())
- bias_emb = tf.get_variable(name='bias_emb', initializer=tf.constant(0.1, shape=[DIM_EMB]))
- bias_out = tf.get_variable(name='bias_out', initializer=tf.constant(0.1, shape=[n_classes]))
- # Embedding transformation
- # emb: (n_steps, emb_size)
- emb = tf.nn.relu(tf.matmul(x, weight_emb + bias_emb))
- # Split and stack to get the embedding into shape:
- # (n_steps, BATCH_SIZE, DIM_EMB)
- emb = tf.split(emb, n_steps, 0)
- emb = tf.stack(emb)
- # Transpose emb matrix: (BATCH_SIZE, n_steps, DIM_EMB)
- emb = tf.transpose(emb, [1, 0, 2])
- # (BATCH_SIZE, n_steps, DIM_HIDDEN)
- rnn_outputs = RNN(emb, seq_len)
- # Get a matrix (BATCH_SIZE * n_steps, DIM_HIDDEN)
- rnn_outputs_reshape = tf.reshape(rnn_outputs, [-1, DIM_HIDDEN])
- # Get net input for prediction
- outputs_net = tf.matmul(rnn_outputs_reshape, weight_out) + bias_out
- # Reshape the matrix: (BATCH_SIZE, n_steps, n_classes)
- logits = tf.reshape(outputs_net, [BATCH_SIZE, n_steps, n_classes])
- # Final prediction for each time step of each sequence of each batch
- preds = tf.nn.relu(logits)
- # Create a boolean mask from the labels (True = not padded)
- mask = tf.sign(tf.reduce_max(tf.abs(y), reduction_indices=1)) # var to tjek TODO
- mask_bool = tf.cast(used, tf.bool)
- # Get the loss for every time step (including the padded time steps)
- full_loss = tf.squared_difference(tf.squeeze(preds, 2), tf.cast(y, tf.float32))
- # Only get the cost for the time steps that are not padded
- cost = tf.reduce_mean(tf.boolean_mask(full_loss, mask_bool))
- # Summary for the cost
- tf.summary.scalar('cost', cost)
- optimizer = tf.train.GradientDescentOptimizer(
- learning_rate=learning_rate).minimize(cost)
- return {'optimizer': optimizer,
- 'cost': cost}
- def queue_func(filename):
- # Create queues for data loading
- queue = tf.train.string_input_producer(
- [filename], num_epochs=n_epochs, shuffle=True)
- # Parse features from queue
- features = parse_features(
- queue=queue,
- max_seq_len=n_steps,
- feature_size=n_features,
- batch_size=BATCH_SIZE)
- dense_features = tf.sparse_tensor_to_dense(features['features'])
- labels = features['label']
- labels = tf.tile(labels, [1, tf.shape(dense_features)[1]])
- return dense_features, labels, tf.squeeze(features['seq_len'], 1)
- # Save and restore
- #saver = tf.train.Saver(max_to_keep=n_epochs)
- # definer grafen to gange med delte vægte
- # forskellen er at den ene bruger den ene kø, den anden den anden kø
- def main():
- print("Reading from: {0}".format(DATA_FILE))
- with tf.Session() as sess:
- x_train, y_train, seq_len_train = queue_func(DATA_FILE)
- x_test, y_test, seq_len_test = queue_func(DATA_FILE_TEST)
- graph_template = tf.make_template("", model)
- graph_train = graph_template(x_train, y_train, seq_len_train)
- graph_test = graph_template(x_test, y_test, seq_len_test)
- # Merge all summaries into a single operator
- merged_summary_op = tf.summary.merge_all(key=tf.GraphKeys.SUMMARIES)
- # Instantiate a SummaryWriter to output summaries and the Graph.
- summary_writer = tf.summary.FileWriter(
- OUTPUT_LOG_DIR, graph=sess.graph)
- # Init
- sess.run([tf.local_variables_initializer(),
- tf.global_variables_initializer()])
- # Start populating the filename queue.
- coord = tf.train.Coordinator()
- threads = tf.train.start_queue_runners(coord=coord, sess=sess)
- count = 0
- epoch = 1
- epoch_count = 0
- try:
- while not coord.should_stop():
- # Fetch data, x, y
- #batch_x, batch_y = sess.run([dense_features, labels])
- # squeeze Removes dimensions of size 1 from the shape of a tensor.
- # batch_y = sess.run(labels)
- # batch_y contains one target per sequence
- # batch_steps_y contains the same target per time step in a sequence
- # tf.tile
- # batch_steps_y = [[y] * n_steps for y in batch_y]
- # Train
- # drop feed dict
- _, cost = sess.run([graph_train['optimizer'], graph_train['cost']])
- count += 1
- if count % 10 == 0:
- #if DATA_SIZE - (epoch_count * BATCH_SIZE) < BATCH_SIZE:
- # Save the remainer of the epoch otherwise we will stop earlier
- # after X number of epochs
- print("Epoch {0} done! Saving model".format(str(epoch)))
- # Calculate batch loss
- cost = sess.run(graph_test['cost'])
- print(cost)
- """
- summary, loss = sess.run([merged_summary_op, cost],
- feed_dict={x: batch_x,
- y: batch_steps_y})
- # Save a checkpoint
- summary_writer.add_summary(summary, count)
- saver.save(
- sess,
- save_path=OUTPUT_LOG_DIR + 'model-epoch-' + str(epoch)
- + '.ckpt')
- """
- #epoch += 1
- #epoch_count = 0
- """
- if count % display_step == 0:
- # Calculate batch loss
- summary, loss = sess.run([merged_summary_op, cost],
- feed_dict={x: batch_x,
- y: batch_steps_y})
- print("Epoch: {0}, Iter: {1}, Minibatch Cost: {2},".format(
- str(epoch), str(count), loss))
- # Save a checkpoint
- #summary_writer.add_summary(summary, count)
- #saver.save(
- # sess,
- # save_path=OUTPUT_LOG_DIR + 'model.ckpt',
- # global_step=count)
- count += 1
- epoch_count += 1
- """
- except tf.errors.OutOfRangeError:
- print('Done Loading')
- #summary_writer.close()
- finally:
- coord.request_stop()
- coord.join(threads=threads)
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement