Guest User

Untitled

a guest
Aug 6th, 2017
115
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.50 KB | None | 0 0
  1. import tensorflow as tf
  2. import numpy as np
  3. from matplotlib.pyplot import cm
  4. import matplotlib.pyplot as plt
  5. from sklearn.datasets.samples_generator import make_blobs
  6.  
  7. def categorical_scatter_2d(X2D, class_idxs, ms=3, ax=None, alpha=1.0):
  8.     ## Plot a 2D matrix with corresponding class labels: each class diff colour
  9.     if ax is None:
  10.         fig, ax = plt.subplots()
  11.     classes = np.unique(class_idxs)
  12.     colors = cm.rainbow(np.linspace(0,1, len(classes)))
  13.     for i, cls in enumerate(classes):
  14.         ax.scatter(X2D[class_idxs==cls, 0], X2D[class_idxs==cls, 1],
  15.                    label=str(cls), alpha=alpha, color=colors[i])
  16.     return ax
  17.  
  18. def cos_sim(A):
  19.     similarity = np.dot(A, A.T)
  20.    
  21.     # squared magnitude of preference vectors (number of occurrences)
  22.     square_mag = np.diag(similarity)
  23.    
  24.     # inverse squared magnitude
  25.     inv_square_mag = 1 / square_mag
  26.    
  27.     # if it doesn't occur, set it's inverse magnitude to zero (instead of inf)
  28.     inv_square_mag[np.isinf(inv_square_mag)] = 0
  29.    
  30.     # inverse of the magnitude
  31.     inv_mag = np.sqrt(inv_square_mag)
  32.    
  33.     # cosine similarity (elementwise multiply by inverse magnitudes)
  34.     cosine = similarity * inv_mag
  35.     cosine = cosine.T * inv_mag
  36.     return cosine
  37.  
  38. # First we make the 'true' latent space for generating our observed pair
  39. # similarities
  40. N = 500
  41. # This all still works if the latent space is higher dimensional
  42. D = 2
  43. z_true, labels = make_blobs(n_samples=N, cluster_std=1., centers=10,
  44.                             n_features=D, random_state=2)
  45. categorical_scatter_2d(z_true, labels)
  46. plt.show()
  47. # Create synthetic 'pair probabilities' based on cosine similarity or sigmoid
  48. # of dot product. I found the latter works better. You might need to ensure
  49. # your input data is spiky, or play with other aspects to get it to work if not
  50.  
  51. #sims = np.minimum(np.maximum(cos_sim(z_true) / 2. + 0.5, 0.0), 1.0) ** 10
  52. sims = 1. / (1. + np.exp(-np.dot(z_true, z_true.T)))
  53. plt.hist(sims.reshape(-1))
  54. plt.show()
  55.  
  56. # Now we make a model to find latent vectors whose similarity predicts our
  57. # pair probabilities well
  58.  
  59. tf.reset_default_graph()
  60.  
  61. # Dimensionality of our estimated latent space
  62. D_hat = 2
  63.  
  64. # Iniitialize a latent vector for each of our examples
  65. zhat = tf.get_variable('zhat', [N, D_hat],
  66.                        initializer=tf.random_normal_initializer(0, 1.0))
  67. # We have the target probabilities in a matrix
  68. # Note there's no means to deal with the random censoring in your data
  69. # You could just eliminate these pairs the loss calculation with a mask
  70. p_target = tf.constant(sims.astype(np.float32), name='target')
  71.  
  72. # Estimated probability of a matching pair
  73. norm = tf.reduce_sum(tf.square(zhat), 1, keep_dims=True)
  74. zhat_normd = zhat / norm
  75. p_hat = tf.nn.sigmoid(tf.matmul(zhat_normd, zhat_normd, transpose_b=True))
  76.  
  77. # Cross entropy cost
  78. xent_mat = p_target * tf.log(p_hat + 1e-8) + \
  79.             (1 - p_target) * tf.log(1 - p_hat + 1e-8)
  80. # Mean of cross-entropy --> ignore elements on the diagonal which have prob=1.0
  81. xent = -tf.reduce_mean(xent_mat * -(tf.diag(tf.ones([tf.shape(p_hat)[0]]))-1))
  82.  
  83. # Train op + init
  84. train_op = tf.train.AdamOptimizer(0.01).minimize(xent)
  85. sess = tf.InteractiveSession()
  86. sess.run(tf.global_variables_initializer())
  87.  
  88. # Train then plot the latent space
  89. for i in range(1000):
  90.     if i % 100 == 0:
  91.         print sess.run([xent, train_op])[0]
  92.     else:
  93.         sess.run(train_op)
  94. zhat_ = sess.run(zhat_normd)
  95.  
  96. # You obviously won't know the classes for the latent space, but maybe you can
  97. # run k-means or other clustering on it...
  98. categorical_scatter_2d(zhat_, labels)
  99.  
  100. #==============================================================================
  101. # # Below here is a minibatch version that doesn't seem to work - not sure why.
  102. # perm = np.random.permutation(len(sims))
  103. # sims, labels = sims[perm], labels[perm]
  104. #
  105. # batch_size = 32
  106. # tf.reset_default_graph()
  107. #
  108. # D_hat = 2
  109. # zhat = tf.get_variable('zhat', [N, D_hat],
  110. #                        initializer=tf.random_normal_initializer(0, 1.0))
  111. #            
  112. # target = tf.constant(sims.astype(np.float32), name='target')
  113. #
  114. # selection_i = tf.random_uniform([batch_size], 0, tf.shape(target)[0], tf.int32)
  115. # selection_j = tf.random_uniform([batch_size], 0, tf.shape(target)[0], tf.int32)
  116. # #selection_i = tf.range(0, batch_size)
  117. # #selection_j = tf.range(0, batch_size)
  118. #
  119. # z_i = tf.gather(zhat, selection_i)
  120. # z_j = tf.gather(zhat, selection_j)
  121. #
  122. # sess = tf.InteractiveSession()
  123. # sess.run(tf.global_variables_initializer())
  124. # zhat_ = sess.run(zhat)
  125. # p_hat_sub = tf.nn.sigmoid(tf.matmul(z_i, z_j, transpose_b=True))
  126. # t_sub = tf.gather(tf.transpose(tf.gather(target, selection_i)), selection_j)
  127. #
  128. # xent_mat = t_sub * tf.log(p_hat_sub) + (1 - t_sub) * tf.log(1 - p_hat_sub)
  129. # xent_sub = -tf.reduce_mean(xent_mat * -(tf.diag(tf.ones([batch_size]))-1))
  130. #
  131. #
  132. # train_op = tf.train.AdamOptimizer(0.01).minimize(xent_sub)
  133. #
  134. # sess = tf.InteractiveSession()
  135. # sess.run(tf.global_variables_initializer())
  136. #
  137. # zhat_ = sess.run(zhat)
  138. # categorical_scatter_2d(zhat_, labels)
  139. #
  140. # #%%
  141. # j = 0
  142. # for i in range(int(1000 / (batch_size / 500.))):
  143. #    
  144. #     if i % (int(100 / (batch_size / 500.))) == 0:
  145. #         print sess.run([xent_sub, train_op], {})[0]
  146. #     else:
  147. #         sess.run(train_op, {})
  148. #     j += 1
  149. #     if j >= 500 / batch_size:
  150. #         j = 0
  151. # zhat_ = sess.run(zhat)
  152. # categorical_scatter_2d(zhat_, labels)
  153. #
  154. #==============================================================================
Advertisement
Add Comment
Please, Sign In to add comment