Guest User

Untitled

a guest
Mar 18th, 2018
94
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.78 KB | None | 0 0
  1. """This file contains some basic model components"""
  2.  
  3. import tensorflow as tf
  4. from tensorflow.python.ops.rnn_cell import DropoutWrapper
  5. from tensorflow.python.ops import variable_scope as vs
  6. from tensorflow.python.ops import rnn_cell
  7. from operator import mul
  8.  
  9.  
  10. class LSTMEncoder(object):
  11.  
  12. def __init__(self, hidden_size, keep_prob):
  13. """
  14. Inputs:
  15. hidden_size: int. Hidden size of the RNN
  16. keep_prob: Tensor containing a single scalar that is the keep probability (for dropout)
  17. """
  18. self.hidden_size = hidden_size #200
  19. self.keep_prob = keep_prob
  20. self.lstm = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_size, forget_bias=1.0)
  21. self.lstm = tf.nn.rnn_cell.DropoutWrapper(cell=self.lstm, input_keep_prob=self.keep_prob)
  22. #self.question_length = 30
  23. #self.context_length = 600
  24.  
  25.  
  26. def build_graph(self, inputs, masks, type):
  27.  
  28. with vs.variable_scope("LSTMEncoder"):
  29. input_lens = tf.reduce_sum(masks, reduction_indices=1)
  30. inputs_size=inputs.get_shape().as_list()
  31. inputs_temp=inputs
  32.  
  33. # 1) get encoding from LSTM
  34. C_or_Q, _ = tf.nn.dynamic_rnn(self.lstm, inputs_temp, sequence_length=input_lens, dtype=tf.float32)
  35.  
  36.  
  37. if type=="question":
  38. # IF it is Question_hidden
  39. # Calculate q_dash = tanh(W q + b)
  40. q_dash = tf.layers.dense(inputs_temp, C_or_Q.get_shape()[2], activation=tf.tanh)
  41. inputs_temp = q_dash
  42. sentinel = tf.get_variable("sentinel_q", [1, self.hidden_size], initializer=tf.random_normal_initializer())
  43. else:
  44. # IF it is Context_hidden
  45. sentinel = tf.get_variable("sentinel_c", [1, self.hidden_size], initializer=tf.random_normal_initializer()) # 1,200
  46. inputs_temp = C_or_Q
  47. #
  48. # reshape sentinel
  49. sentinel = tf.reshape(sentinel, (1, 1, -1)) #1, 1, 200
  50. # reshape sentinel to add batch
  51. sentinel = tf.tile(sentinel, (tf.shape(inputs_temp)[0], 1, 1)) #?, 1, 200
  52. # add sentinel at end
  53. out = tf.concat([inputs_temp, sentinel], 1) # ?, 601, 200
  54.  
  55. out.get_shape().as_list()
  56.  
  57. # Apply dropout
  58. out = tf.nn.dropout(out, self.keep_prob)
  59.  
  60. return out
  61.  
  62.  
  63. class CoAttention(object):
  64.  
  65. def __init__(self, keep_prob, context_hidden_size, query_hidden_size):
  66. """
  67. Inputs:
  68. keep_prob: tensor containing a single scalar that is the keep probability (for dropout)
  69. key_vec_size: size of the key vectors. int
  70. value_vec_size: size of the value vectors. int
  71. """
  72. self.keep_prob = keep_prob
  73. self.context_hidden_size = context_hidden_size
  74. self.query_hidden_size = query_hidden_size
  75.  
  76.  
  77. def build_graph(self, question_hiddens, context_mask, context_hiddens):
  78.  
  79. with vs.variable_scope('Coattention') as scope:
  80. question_hiddens.get_shape().as_list() #? , 31, 200
  81. context_hiddens.get_shape().as_list() #? ,601, 200
  82.  
  83. question_length = tf.shape(question_hiddens)[1]
  84. context_length = tf.shape(context_hiddens)[1]
  85. keys_dim = tf.shape(context_hiddens)[2]
  86.  
  87.  
  88. Q_tranpose = tf.transpose(question_hiddens, perm=[0, 2, 1]) #?, 200, 31
  89. # L = D.T * Q = C.T * Q
  90. L = tf.matmul(context_hiddens, Q_tranpose) #?, 601, 31
  91.  
  92. L_transpose = tf.transpose(L, perm=[0, 2, 1]) #?, 31, 601
  93.  
  94. # Q_logits_mask = tf.expand_dims(question_hiddens, 1)
  95. # C_logits_mask = tf.expand_dims(context_hiddens, 1)# shape (batch_size, 1, num_values)
  96. # _, attn_dist = masked_softmax(attn_logits, attn_logits_mask, 2) # shape (batch_size, num_keys, num_values). take softmax over values
  97. #
  98. # # Use attention distribution to take weighted sum of values
  99. # output = tf.matmul(attn_dist, values) # shape (batch_size, num_keys, value_vec_size)
  100.  
  101. A_D = tf.map_fn(lambda x: tf.nn.softmax(x), L_transpose, dtype=tf.float32) #?, 31, 601
  102.  
  103. A_Q = tf.map_fn(lambda x: tf.nn.softmax(x), L, dtype=tf.float32) #?, 601, 31
  104.  
  105. C_Q = tf.matmul(tf.transpose(context_hiddens, perm=[0, 2, 1]), A_Q) #?, 200, 31
  106.  
  107. Q_concat_CQ = tf.concat([Q_tranpose, C_Q], axis=1) #?, 400, 31
  108.  
  109. C_D = tf.matmul(Q_concat_CQ, A_D) #?, 400, 601
  110.  
  111. CO_ATT = tf.concat([context_hiddens, tf.transpose(C_D, perm=[0, 2, 1])], axis=2) #?, 601, 600
  112.  
  113. with tf.variable_scope('Coatt_encoder'):
  114. # LSTM for coattention encoding
  115. cell_fw = tf.nn.rnn_cell.BasicLSTMCell(self.query_hidden_size, forget_bias=1.0)
  116. cell_fw = DropoutWrapper(cell_fw, input_keep_prob=self.keep_prob)
  117. cell_bw = tf.nn.rnn_cell.BasicLSTMCell(self.query_hidden_size, forget_bias=1.0)
  118. cell_bw = DropoutWrapper(cell_bw, input_keep_prob=self.keep_prob)
  119. input_lens = tf.reduce_sum(context_mask, reduction_indices=1)
  120. #?, 601, 400
  121. (fw_out, bw_out), _ = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, CO_ATT,
  122. dtype=tf.float32, sequence_length=input_lens+1)
  123. U_1 = tf.concat([fw_out, bw_out], axis=2)
  124.  
  125. dims = U_1.get_shape().as_list()
  126. # Remove the sentinel vector from last row
  127. U_2 = tf.slice(U_1, [0,0,0], [tf.shape(U_1)[0], dims[1]-1, dims[2]])
  128. U_2 = tf.reshape(U_2, [tf.shape(U_1)[0], dims[1]-1, dims[2]])
  129. #?, 601, 400
  130. U_3 = tf.nn.dropout(U_2, self.keep_prob)
  131.  
  132. out = tf.nn.dropout(U_3, self.keep_prob)
  133. #?, 601, 400
  134. return out
  135.  
  136. def masked_softmax(logits, mask, dim):
  137. """
  138. Takes masked softmax over given dimension of logits.
  139.  
  140. Inputs:
  141. logits: Numpy array. We want to take softmax over dimension dim.
  142. mask: Numpy array of same shape as logits.
  143. Has 1s where there's real data in logits, 0 where there's padding
  144. dim: int. dimension over which to take softmax
  145.  
  146. Returns:
  147. masked_logits: Numpy array same shape as logits.
  148. This is the same as logits, but with 1e30 subtracted
  149. (i.e. very large negative number) in the padding locations.
  150. prob_dist: Numpy array same shape as logits.
  151. The result of taking softmax over masked_logits in given dimension.
  152. Should be 0 in padding locations.
  153. Should sum to 1 over given dimension.
  154. """
  155. exp_mask = (1 - tf.cast(mask, 'float')) * (-1e30) # -large where there's padding, 0 elsewhere
  156. masked_logits = tf.add(logits, exp_mask) # where there's padding, set logits to -large
  157. prob_dist = tf.nn.softmax(masked_logits, dim)
  158. return masked_logits, prob_dist
Add Comment
Please, Sign In to add comment