Guest User

Untitled

a guest
Oct 24th, 2018
140
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.80 KB | None | 0 0
  1. #
  2. # tsne.py
  3. #
  4. # Implementation of t-SNE in Python. The implementation was tested on Python
  5. # 2.7.10, and it requires a working installation of NumPy. The implementation
  6. # comes with an example on the MNIST dataset. In order to plot the
  7. # results of this example, a working installation of matplotlib is required.
  8. #
  9. # The example can be run by executing: `ipython tsne.py`
  10. #
  11. #
  12. # Created by Laurens van der Maaten on 20-12-08.
  13. # Copyright (c) 2008 Tilburg University. All rights reserved.
  14.  
  15. import numpy as np
  16. import pylab
  17.  
  18.  
  19. def Hbeta(D=np.array([]), beta=1.0):
  20. """
  21. Compute the perplexity and the P-row for a specific value of the
  22. precision of a Gaussian distribution.
  23. """
  24.  
  25. # Compute P-row and corresponding perplexity
  26. P = np.exp(-D.copy() * beta)
  27. sumP = sum(P)
  28. H = np.log(sumP) + beta * np.sum(D * P) / sumP
  29. P = P / sumP
  30. return H, P
  31.  
  32.  
  33. def x2p(X=np.array([]), tol=1e-5, perplexity=30.0):
  34. """
  35. Performs a binary search to get P-values in such a way that each
  36. conditional Gaussian has the same perplexity.
  37. """
  38.  
  39. # Initialize some variables
  40. print("Computing pairwise distances...")
  41. (n, d) = X.shape
  42. sum_X = np.sum(np.square(X), 1)
  43. D = np.add(np.add(-2 * np.dot(X, X.T), sum_X).T, sum_X)
  44. P = np.zeros((n, n))
  45. beta = np.ones((n, 1))
  46. logU = np.log(perplexity)
  47.  
  48. # Loop over all datapoints
  49. for i in range(n):
  50.  
  51. # Print progress
  52. if i % 500 == 0:
  53. print("Computing P-values for point %d of %d..." % (i, n))
  54.  
  55. # Compute the Gaussian kernel and entropy for the current precision
  56. betamin = -np.inf
  57. betamax = np.inf
  58. Di = D[i, np.concatenate((np.r_[0:i], np.r_[i+1:n]))]
  59. (H, thisP) = Hbeta(Di, beta[i])
  60.  
  61. # Evaluate whether the perplexity is within tolerance
  62. Hdiff = H - logU
  63. tries = 0
  64. while np.abs(Hdiff) > tol and tries < 50:
  65.  
  66. # If not, increase or decrease precision
  67. if Hdiff > 0:
  68. betamin = beta[i].copy()
  69. if betamax == np.inf or betamax == -np.inf:
  70. beta[i] = beta[i] * 2.
  71. else:
  72. beta[i] = (beta[i] + betamax) / 2.
  73. else:
  74. betamax = beta[i].copy()
  75. if betamin == np.inf or betamin == -np.inf:
  76. beta[i] = beta[i] / 2.
  77. else:
  78. beta[i] = (beta[i] + betamin) / 2.
  79.  
  80. # Recompute the values
  81. (H, thisP) = Hbeta(Di, beta[i])
  82. Hdiff = H - logU
  83. tries += 1
  84.  
  85. # Set the final row of P
  86. P[i, np.concatenate((np.r_[0:i], np.r_[i+1:n]))] = thisP
  87.  
  88. # Return final P-matrix
  89. print("Mean value of sigma: %f" % np.mean(np.sqrt(1 / beta)))
  90. return P
  91.  
  92.  
  93. def pca(X=np.array([]), no_dims=50):
  94. """
  95. Runs PCA on the NxD array X in order to reduce its dimensionality to
  96. no_dims dimensions.
  97. """
  98.  
  99. print("Preprocessing the data using PCA...")
  100. (n, d) = X.shape
  101. X = X - np.tile(np.mean(X, 0), (n, 1))
  102. (l, M) = np.linalg.eig(np.dot(X.T, X))
  103. Y = np.dot(X, M[:, 0:no_dims])
  104. return Y
  105.  
  106.  
  107. def tsne(X=np.array([]), no_dims=2, initial_dims=50, perplexity=30.0):
  108. """
  109. Runs t-SNE on the dataset in the NxD array X to reduce its
  110. dimensionality to no_dims dimensions. The syntaxis of the function is
  111. `Y = tsne.tsne(X, no_dims, perplexity), where X is an NxD NumPy array.
  112. """
  113.  
  114. # Check inputs
  115. if isinstance(no_dims, float):
  116. print("Error: array X should have type float.")
  117. return -1
  118. if round(no_dims) != no_dims:
  119. print("Error: number of dimensions should be an integer.")
  120. return -1
  121.  
  122. # Initialize variables
  123. X = pca(X, initial_dims).real
  124. (n, d) = X.shape
  125. max_iter = 1000
  126. initial_momentum = 0.5
  127. final_momentum = 0.8
  128. eta = 500
  129. min_gain = 0.01
  130. Y = np.random.randn(n, no_dims)
  131. dY = np.zeros((n, no_dims))
  132. iY = np.zeros((n, no_dims))
  133. gains = np.ones((n, no_dims))
  134.  
  135. # Compute P-values
  136. P = x2p(X, 1e-5, perplexity)
  137. P = P + np.transpose(P)
  138. P = P / np.sum(P)
  139. P = P * 4. # early exaggeration
  140. P = np.maximum(P, 1e-12)
  141.  
  142. # Run iterations
  143. for iter in range(max_iter):
  144.  
  145. # Compute pairwise affinities
  146. sum_Y = np.sum(np.square(Y), 1)
  147. num = -2. * np.dot(Y, Y.T)
  148. num = 1. / (1. + np.add(np.add(num, sum_Y).T, sum_Y))
  149. num[range(n), range(n)] = 0.
  150. Q = num / np.sum(num)
  151. Q = np.maximum(Q, 1e-12)
  152.  
  153. # Compute gradient
  154. PQ = P - Q
  155. for i in range(n):
  156. dY[i, :] = np.sum(np.tile(PQ[:, i] * num[:, i], (no_dims, 1)).T * (Y[i, :] - Y), 0)
  157.  
  158. # Perform the update
  159. if iter < 20:
  160. momentum = initial_momentum
  161. else:
  162. momentum = final_momentum
  163. gains = (gains + 0.2) * ((dY > 0.) != (iY > 0.)) +
  164. (gains * 0.8) * ((dY > 0.) == (iY > 0.))
  165. gains[gains < min_gain] = min_gain
  166. iY = momentum * iY - eta * (gains * dY)
  167. Y = Y + iY
  168. Y = Y - np.tile(np.mean(Y, 0), (n, 1))
  169.  
  170. # Compute current value of cost function
  171. if (iter + 1) % 10 == 0:
  172. C = np.sum(P * np.log(P / Q))
  173. print("Iteration %d: error is %f" % (iter + 1, C))
  174.  
  175. # Stop lying about P-values
  176. if iter == 100:
  177. P = P / 4.
  178.  
  179. # Return solution
  180. return Y
  181.  
  182.  
  183. if __name__ == "__main__":
  184. print("Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset.")
  185. print("Running example on 2,500 MNIST digits...")
  186. #X = np.loadtxt("mnist2500_X.txt")
  187. X = np.loadtxt("resnet50_feature_vectors.txt")
  188. #labels = np.loadtxt("mnist2500_labels.txt")
  189. labels = np.loadtxt("august_labels.txt")
  190. Y = tsne(X, 2, 2048, 20.0)
  191. pylab.scatter(Y[:, 0], Y[:, 1], 20, labels)
  192. pylab.show()
Add Comment
Please, Sign In to add comment