Advertisement
Guest User

Untitled

a guest
Mar 24th, 2017
50
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 8.56 KB | None | 0 0
  1. """ TODO:
  2. - n_timesteps for a visit is target value
  3. """
  4.  
  5. import tensorflow as tf
  6. from data_loader.tf_io import parse_features
  7. from utils.helpers import sequence_length
  8.  
  9. DATA_FILE = 'mockdata_is_test=False_conv=False.txt'
  10. DATA_FILE_TEST = 'mockdata_is_test=False_conv=False-test.txt'
  11. #DATA_FILE = 'raw_features_granularity_24m_period_720h_1490195909.861038_1000_is_test=False_conv=False'
  12.  
  13. if 'mockdata' in DATA_FILE:
  14. OUTPUT_FILE_DIR = ''
  15. else:
  16. OUTPUT_FILE_DIR = '\\\\RMAPPS0118\\_Faelles\\UDV_EPJSTRUKTURERING\\mi104f17_data/'
  17. DATA_FILE = OUTPUT_FILE_DIR + DATA_FILE
  18.  
  19. OUTPUT_LOG_DIR = OUTPUT_FILE_DIR + 'logs/'
  20.  
  21. # Data sizes
  22. DATA_SIZE = 1002
  23.  
  24. # Training Parameters
  25. learning_rate = 0.00001
  26. display_step = 50
  27. n_epochs = 15
  28. BATCH_SIZE = 15 # TODO: experimenter, måske 100-200
  29.  
  30. # Network Parameters
  31. n_steps = 10 # VISIT_MAX_DURATION_IN_HOURS / SEQUENCE_LENGTH_IN_HOURS
  32. n_classes = 1 # regression
  33. n_features = 10 # VISIT_MAX_DURATION_IN_HOURS * 60 / SEQUENCE_LENGTH_IN_HOURS ????????????
  34. DIM_EMB = 200
  35. DIM_HIDDEN = 200
  36.  
  37. # Define weight
  38.  
  39.  
  40.  
  41. # TODO: https://www.youtube.com/watch?v=JYqjcHYTQgQ
  42. def RNN(emb, seq_len):
  43. """
  44. :param x: input with dimensions: BATCH_SIZE x n_steps x n_features
  45. :return: outputs
  46. """
  47. lstm_cell = tf.contrib.rnn.LSTMCell(DIM_HIDDEN)
  48. # TODO: States?
  49. outputs, states = tf.nn.dynamic_rnn(
  50. lstm_cell, emb, dtype=tf.float32, sequence_length=seq_len)
  51.  
  52. # Return the outputs for each time step: (BATCH_SIZE, n_steps, DIM_HIDDEN)
  53. return outputs
  54.  
  55. def model(x, y, seq_len):
  56.  
  57. used, seq_len1 = sequence_length(x)
  58.  
  59. # Transpose our matrix: (n_steps, BATCH_SIZE, n_features)
  60. x = tf.transpose(x, [1, 0, 2])
  61. # Reshaping to (n_steps*BATCH_SIZE, n_features)
  62. # -1: remove dimension
  63. x = tf.reshape(x, [-1, n_features])
  64.  
  65. weight_emb = tf.get_variable(name='weight_emb', shape=[n_features, DIM_EMB], initializer=tf.random_normal_initializer())
  66. weight_out = tf.get_variable(name='weight_out', shape=[DIM_HIDDEN, n_classes], initializer=tf.random_normal_initializer())
  67. bias_emb = tf.get_variable(name='bias_emb', initializer=tf.constant(0.1, shape=[DIM_EMB]))
  68. bias_out = tf.get_variable(name='bias_out', initializer=tf.constant(0.1, shape=[n_classes]))
  69.  
  70. # Embedding transformation
  71. # emb: (n_steps, emb_size)
  72. emb = tf.nn.relu(tf.matmul(x, weight_emb + bias_emb))
  73.  
  74. # Split and stack to get the embedding into shape:
  75. # (n_steps, BATCH_SIZE, DIM_EMB)
  76. emb = tf.split(emb, n_steps, 0)
  77. emb = tf.stack(emb)
  78.  
  79. # Transpose emb matrix: (BATCH_SIZE, n_steps, DIM_EMB)
  80. emb = tf.transpose(emb, [1, 0, 2])
  81.  
  82. # (BATCH_SIZE, n_steps, DIM_HIDDEN)
  83. rnn_outputs = RNN(emb, seq_len)
  84.  
  85. # Get a matrix (BATCH_SIZE * n_steps, DIM_HIDDEN)
  86. rnn_outputs_reshape = tf.reshape(rnn_outputs, [-1, DIM_HIDDEN])
  87.  
  88. # Get net input for prediction
  89. outputs_net = tf.matmul(rnn_outputs_reshape, weight_out) + bias_out
  90.  
  91. # Reshape the matrix: (BATCH_SIZE, n_steps, n_classes)
  92. logits = tf.reshape(outputs_net, [BATCH_SIZE, n_steps, n_classes])
  93.  
  94. # Final prediction for each time step of each sequence of each batch
  95. preds = tf.nn.relu(logits)
  96.  
  97. # Create a boolean mask from the labels (True = not padded)
  98. mask = tf.sign(tf.reduce_max(tf.abs(y), reduction_indices=1)) # var to tjek TODO
  99. mask_bool = tf.cast(used, tf.bool)
  100.  
  101. # Get the loss for every time step (including the padded time steps)
  102. full_loss = tf.squared_difference(tf.squeeze(preds, 2), tf.cast(y, tf.float32))
  103.  
  104. # Only get the cost for the time steps that are not padded
  105. cost = tf.reduce_mean(tf.boolean_mask(full_loss, mask_bool))
  106.  
  107. # Summary for the cost
  108. tf.summary.scalar('cost', cost)
  109.  
  110. optimizer = tf.train.GradientDescentOptimizer(
  111. learning_rate=learning_rate).minimize(cost)
  112.  
  113. return {'optimizer': optimizer,
  114. 'cost': cost}
  115.  
  116. def queue_func(filename):
  117. # Create queues for data loading
  118. queue = tf.train.string_input_producer(
  119. [filename], num_epochs=n_epochs, shuffle=True)
  120.  
  121. # Parse features from queue
  122. features = parse_features(
  123. queue=queue,
  124. max_seq_len=n_steps,
  125. feature_size=n_features,
  126. batch_size=BATCH_SIZE)
  127.  
  128. dense_features = tf.sparse_tensor_to_dense(features['features'])
  129. labels = features['label']
  130.  
  131. labels = tf.tile(labels, [1, tf.shape(dense_features)[1]])
  132.  
  133. return dense_features, labels, tf.squeeze(features['seq_len'], 1)
  134.  
  135. # Save and restore
  136. #saver = tf.train.Saver(max_to_keep=n_epochs)
  137.  
  138. # definer grafen to gange med delte vægte
  139. # forskellen er at den ene bruger den ene kø, den anden den anden kø
  140.  
  141.  
  142. def main():
  143. print("Reading from: {0}".format(DATA_FILE))
  144. with tf.Session() as sess:
  145. x_train, y_train, seq_len_train = queue_func(DATA_FILE)
  146. x_test, y_test, seq_len_test = queue_func(DATA_FILE_TEST)
  147.  
  148. graph_template = tf.make_template("", model)
  149.  
  150. graph_train = graph_template(x_train, y_train, seq_len_train)
  151. graph_test = graph_template(x_test, y_test, seq_len_test)
  152.  
  153. # Merge all summaries into a single operator
  154. merged_summary_op = tf.summary.merge_all(key=tf.GraphKeys.SUMMARIES)
  155.  
  156. # Instantiate a SummaryWriter to output summaries and the Graph.
  157. summary_writer = tf.summary.FileWriter(
  158. OUTPUT_LOG_DIR, graph=sess.graph)
  159. # Init
  160. sess.run([tf.local_variables_initializer(),
  161. tf.global_variables_initializer()])
  162. # Start populating the filename queue.
  163. coord = tf.train.Coordinator()
  164. threads = tf.train.start_queue_runners(coord=coord, sess=sess)
  165. count = 0
  166. epoch = 1
  167. epoch_count = 0
  168. try:
  169. while not coord.should_stop():
  170. # Fetch data, x, y
  171. #batch_x, batch_y = sess.run([dense_features, labels])
  172.  
  173. # squeeze Removes dimensions of size 1 from the shape of a tensor.
  174. # batch_y = sess.run(labels)
  175. # batch_y contains one target per sequence
  176. # batch_steps_y contains the same target per time step in a sequence
  177. # tf.tile
  178. # batch_steps_y = [[y] * n_steps for y in batch_y]
  179.  
  180. # Train
  181. # drop feed dict
  182. _, cost = sess.run([graph_train['optimizer'], graph_train['cost']])
  183.  
  184. count += 1
  185. if count % 10 == 0:
  186. #if DATA_SIZE - (epoch_count * BATCH_SIZE) < BATCH_SIZE:
  187.  
  188. # Save the remainer of the epoch otherwise we will stop earlier
  189. # after X number of epochs
  190. print("Epoch {0} done! Saving model".format(str(epoch)))
  191. # Calculate batch loss
  192.  
  193. cost = sess.run(graph_test['cost'])
  194. print(cost)
  195.  
  196. """
  197. summary, loss = sess.run([merged_summary_op, cost],
  198. feed_dict={x: batch_x,
  199. y: batch_steps_y})
  200. # Save a checkpoint
  201. summary_writer.add_summary(summary, count)
  202. saver.save(
  203. sess,
  204. save_path=OUTPUT_LOG_DIR + 'model-epoch-' + str(epoch)
  205. + '.ckpt')
  206. """
  207. #epoch += 1
  208. #epoch_count = 0
  209. """
  210. if count % display_step == 0:
  211. # Calculate batch loss
  212. summary, loss = sess.run([merged_summary_op, cost],
  213. feed_dict={x: batch_x,
  214. y: batch_steps_y})
  215. print("Epoch: {0}, Iter: {1}, Minibatch Cost: {2},".format(
  216. str(epoch), str(count), loss))
  217. # Save a checkpoint
  218. #summary_writer.add_summary(summary, count)
  219. #saver.save(
  220. # sess,
  221. # save_path=OUTPUT_LOG_DIR + 'model.ckpt',
  222. # global_step=count)
  223.  
  224. count += 1
  225. epoch_count += 1
  226. """
  227.  
  228. except tf.errors.OutOfRangeError:
  229. print('Done Loading')
  230. #summary_writer.close()
  231. finally:
  232. coord.request_stop()
  233. coord.join(threads=threads)
  234.  
  235.  
  236. if __name__ == "__main__":
  237. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement