Advertisement
Guest User

Untitled

a guest
Feb 21st, 2019
56
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.78 KB | None | 0 0
  1. import numpy as np
  2. from tensorflow import keras as ks
  3.  
  4. BATCH_SIZE = 32
  5. NUM_DENSE_LAYERS = 1
  6. NUM_FEATURES = 256
  7. NUM_LSTM_LAYERS = 2
  8. VOCAB_SIZE = 262_144
  9.  
  10.  
  11. def l2norm(matrix):
  12. return ks.backend.sqrt(
  13. ks.backend.sum(
  14. ks.backend.square(matrix),
  15. axis=-1,
  16. keepdims=True,
  17. ),
  18. )
  19.  
  20.  
  21. class CosineSimilarity(ks.layers.Layer):
  22. def __init__(self, activation=None, **kwargs):
  23. self.activation = ks.activations.get(activation)
  24. super().__init__(trainable=False, **kwargs)
  25.  
  26. def call(self, inputs):
  27. left, right = (
  28. matrix / (l2norm(matrix) + ks.backend.epsilon())
  29. for matrix in inputs
  30. )
  31. output = ks.backend.dot(left, ks.backend.transpose(right))
  32. return self.activation(output)
  33.  
  34. def compute_output_shape(self, input_shape):
  35. shape_left, shape_right = input_shape
  36. return (shape_left[0], shape_right[0])
  37.  
  38.  
  39. class TextPipeline(ks.models.Sequential):
  40. def __init__(self):
  41. super().__init__()
  42. self.add(ks.layers.Embedding(
  43. input_dim=VOCAB_SIZE,
  44. output_dim=NUM_FEATURES,
  45. mask_zero=True,
  46. name='word_embeddings',
  47. ))
  48. for _ in range(NUM_LSTM_LAYERS - 1):
  49. self.add(ks.layers.LSTM(
  50. units=NUM_FEATURES,
  51. return_sequences=True,
  52. ))
  53. # self.add(ks.layers.BatchNormalization())
  54. self.add(ks.layers.LSTM(
  55. units=NUM_FEATURES,
  56. ))
  57. for _ in range(NUM_DENSE_LAYERS):
  58. # self.add(ks.layers.BatchNormalization())
  59. self.add(ks.layers.Dense(
  60. units=NUM_FEATURES,
  61. activation='tanh',
  62. ))
  63.  
  64.  
  65. class Brain(ks.models.Model):
  66. def __init__(self):
  67. offer_input = ks.Input(
  68. shape=(None,),
  69. dtype='int32',
  70. name='offer_input',
  71. )
  72. item_input = ks.Input(
  73. shape=(None,),
  74. dtype='int32',
  75. name='item_input',
  76. )
  77. text_pipeline = TextPipeline()
  78. offer_text_embedding = text_pipeline(offer_input)
  79. item_text_embedding = text_pipeline(item_input)
  80. similarity = CosineSimilarity(
  81. # activation='softmax',
  82. activation='relu',
  83. )([offer_text_embedding, item_text_embedding])
  84.  
  85. super().__init__(
  86. inputs=[offer_input, item_input],
  87. outputs=[similarity],
  88. name='brain',
  89. )
  90. self.compile(
  91. optimizer='adam',
  92. loss='mean_squared_error',
  93. )
  94.  
  95.  
  96. offers = np.array([[ 0, 0, 0, 0],
  97. [ 72, 1536, 408, 120614],
  98. [ 842, 74200, 665, 4342],
  99. [ 17175, 24987, 13786, 1062],
  100. [ 421, 1439, 6, 63],
  101. [ 72, 23805, 33627, 24301],
  102. [ 2977, 2, 133735, 16077],
  103. [ 72, 2081, 77235, 77234],
  104. [ 390, 347, 2, 2],
  105. [ 11, 2, 6339, 2],
  106. [ 35016, 98448, 10, 18498],
  107. [ 3654, 149776, 2395, 701],
  108. [ 1413, 361, 4, 6906],
  109. [ 712, 9577, 2, 52794],
  110. [ 11, 2, 34073, 2],
  111. [ 50408, 12263, 9709, 39344],
  112. [ 11, 2, 2225, 2],
  113. [ 72, 133515, 18916, 7087],
  114. [ 0, 0, 0, 0],
  115. [ 781, 51, 2, 947],
  116. [ 72, 7585, 86920, 2],
  117. [ 0, 0, 0, 0],
  118. [ 26244, 11879, 17887, 92594],
  119. [ 35016, 98448, 10, 18498],
  120. [ 4236, 15909, 39, 6955],
  121. [ 6955, 59, 71, 5225],
  122. [ 50408, 1436, 8509, 6922],
  123. [ 84705, 141, 106, 2],
  124. [ 390, 347, 14, 209],
  125. [ 426, 78, 1291, 385],
  126. [ 918, 2, 892, 2],
  127. [ 1766, 2, 1539, 146045]], dtype='int32')
  128. items = np.array([[ 2777, 10985, 8358, 2777],
  129. [120614, 10618, 38127, 14625],
  130. [ 73393, 37841, 535, 842],
  131. [ 17175, 24987, 13786, 1062],
  132. [ 14415, 107785, 107783, 89041],
  133. [ 23805, 33627, 24301, 23805],
  134. [ 2977, 2, 2040, 72082],
  135. [ 77235, 77234, 793, 7949],
  136. [100147, 100228, 28, 96],
  137. [ 6339, 65544, 35803, 13368],
  138. [ 56546, 35016, 98448, 31056],
  139. [ 34687, 3747, 2, 73159],
  140. [ 15970, 43435, 2, 43438],
  141. [ 38108, 77294, 92322, 96],
  142. [ 34073, 2270, 2093, 121],
  143. [ 39344, 39356, 12263, 9709],
  144. [ 2225, 122243, 19, 1916],
  145. [ 5076, 60819, 17653, 4958],
  146. [ 6339, 36379, 36470, 67052],
  147. [ 2752, 9196, 20315, 2],
  148. [ 7585, 86920, 2, 113815],
  149. [ 2041, 3800, 79, 728],
  150. [ 26244, 11879, 17887, 92594],
  151. [ 56546, 35016, 5651, 98446],
  152. [ 94436, 20753, 2, 2026],
  153. [ 24701, 24988, 2, 24998],
  154. [ 41505, 835, 7559, 2],
  155. [ 70324, 70323, 2, 70328],
  156. [ 2933, 840, 88005, 2],
  157. [ 86927, 92144, 2, 92143],
  158. [ 2752, 14990, 2, 18987],
  159. [ 4999, 3191, 85175, 758]], dtype='int32')
  160. diag_matrix = np.diag(np.ones((BATCH_SIZE,)))
  161.  
  162.  
  163. def generate_batches():
  164. while True:
  165. yield (
  166. {
  167. 'offer_input': offers,
  168. 'item_input': items,
  169. },
  170. diag_matrix,
  171. )
  172.  
  173.  
  174. brain = Brain()
  175. brain.fit_generator(generate_batches(), epochs=1, steps_per_epoch=4)
  176. print(brain.layers[2].layers[3].weights[0].eval(session=ks.backend.get_session()))
  177. # [[nan nan nan ... nan nan nan]
  178. # [nan nan nan ... nan nan nan]
  179. # [nan nan nan ... nan nan nan]
  180. # ...
  181. # [nan nan nan ... nan nan nan]
  182. # [nan nan nan ... nan nan nan]
  183. # [nan nan nan ... nan nan nan]]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement