Advertisement
Guest User

Untitled

a guest
Mar 26th, 2019
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.95 KB | None | 0 0
  1. import numpy as np
  2. import matplotlib.pyplot as plt
  3.  
  4. class Random2DGaussian:
  5. """Random bivariate normal distribution sampler
  6.  
  7. Hardwired parameters:
  8. d0min,d0max: horizontal range for the mean
  9. d1min,d1max: vertical range for the mean
  10. scalecov: controls the covariance range
  11.  
  12. Methods:
  13. __init__: creates a new distribution
  14.  
  15. get_sample(n): samples n datapoints
  16.  
  17. """
  18.  
  19. d0min = 0
  20. d0max = 10
  21. d1min = 0
  22. d1max = 10
  23. scalecov = 5
  24.  
  25. def __init__(self):
  26. dw0, dw1 = self.d0max - self.d0min, self.d1max - self.d1min
  27. mean = (self.d0min, self.d1min)
  28. mean += np.random.random_sample(2) * (dw0, dw1)
  29. eigvals = np.random.random_sample(2)
  30. eigvals *= (dw0 / self.scalecov, dw1 / self.scalecov)
  31. eigvals **= 2
  32. theta = np.random.random_sample() * np.pi * 2
  33. R = [[np.cos(theta), -np.sin(theta)],
  34. [np.sin(theta), np.cos(theta)]]
  35. Sigma = np.dot(np.dot(np.transpose(R), np.diag(eigvals)), R)
  36. self.get_sample = lambda n: np.random.multivariate_normal(mean, Sigma, n)
  37.  
  38.  
  39. def graph_surface(function, rect, offset=0.5, width=256, height=256):
  40. """Creates a surface plot (visualize with plt.show)
  41.  
  42. Arguments:
  43. function: surface to be plotted
  44. rect: function domain provided as:
  45. ([x_min,y_min], [x_max,y_max])
  46. offset: the level plotted as a contour plot
  47.  
  48. Returns:
  49. None
  50. """
  51.  
  52. lsw = np.linspace(rect[0][1], rect[1][1], width)
  53. lsh = np.linspace(rect[0][0], rect[1][0], height)
  54. xx0, xx1 = np.meshgrid(lsh, lsw)
  55. grid = np.stack((xx0.flatten(), xx1.flatten()), axis=1)
  56.  
  57. # get the values and reshape them
  58. values = function(grid).reshape((width, height))
  59.  
  60. # fix the range and offset
  61. delta = offset if offset else 0
  62. maxval = max(np.max(values) - delta, - (np.min(values) - delta))
  63.  
  64. # draw the surface and the offset
  65. plt.pcolormesh(xx0, xx1, values,
  66. vmin=delta - maxval, vmax=delta + maxval)
  67.  
  68. if offset != None:
  69. plt.contour(xx0, xx1, values, colors='black', levels=[offset])
  70.  
  71.  
  72. def graph_data(X, Y_, Y, special=[]):
  73. """Creates a scatter plot (visualize with plt.show)
  74.  
  75. Arguments:
  76. X: datapoints
  77. Y_: groundtruth classification indices
  78. Y: predicted class indices
  79. special: use this to emphasize some points
  80.  
  81. Returns:
  82. None
  83. """
  84. # colors of the datapoint markers
  85. palette = ([0.5, 0.5, 0.5], [1, 1, 1], [0.2, 0.2, 0.2])
  86. colors = np.tile([0.0, 0.0, 0.0], (Y_.shape[0], 1))
  87. for i in range(len(palette)):
  88. colors[Y_ == i] = palette[i]
  89.  
  90. # sizes of the datapoint markers
  91. sizes = np.repeat(20, len(Y_))
  92. sizes[special] = 40
  93.  
  94. # draw the correctly classified datapoints
  95. good = (Y_ == Y)
  96. plt.scatter(X[good, 0], X[good, 1], c=colors[good],
  97. s=sizes[good], marker='o')
  98.  
  99. # draw the incorrectly classified datapoints
  100. bad = (Y_ != Y)
  101. plt.scatter(X[bad, 0], X[bad, 1], c=colors[bad],
  102. s=sizes[bad], marker='s')
  103.  
  104.  
  105. def class_to_onehot(Y):
  106. Yoh = np.zeros((len(Y), max(Y) + 1))
  107. Yoh[range(len(Y)), Y] = 1
  108. return Yoh
  109.  
  110.  
  111. def eval_perf_binary(Y, Y_):
  112. tp = sum(np.logical_and(Y == Y_, Y_ == True))
  113. fn = sum(np.logical_and(Y != Y_, Y_ == True))
  114. tn = sum(np.logical_and(Y == Y_, Y_ == False))
  115. fp = sum(np.logical_and(Y != Y_, Y_ == False))
  116. recall = tp / (tp + fn)
  117. precision = tp / (tp + fp)
  118. accuracy = (tp + tn) / (tp + fn + tn + fp)
  119. return accuracy, recall, precision
  120.  
  121.  
  122. def eval_perf_multi(Y, Y_):
  123. pr = []
  124. n = max(Y_) + 1
  125. M = np.bincount(n * Y_ + Y, minlength=n * n).reshape(n, n)
  126. for i in range(n):
  127. tp_i = M[i, i]
  128. fn_i = np.sum(M[i, :]) - tp_i
  129. fp_i = np.sum(M[:, i]) - tp_i
  130. tn_i = np.sum(M) - fp_i - fn_i - tp_i
  131. recall_i = tp_i / (tp_i + fn_i)
  132. precision_i = tp_i / (tp_i + fp_i)
  133. pr.append((recall_i, precision_i))
  134.  
  135. accuracy = np.trace(M) / np.sum(M)
  136.  
  137. return accuracy, pr, M
  138.  
  139.  
  140. def eval_AP(ranked_labels):
  141. """Recovers AP from ranked labels"""
  142.  
  143. n = len(ranked_labels)
  144. pos = sum(ranked_labels)
  145. neg = n - pos
  146.  
  147. tp = pos
  148. tn = 0
  149. fn = 0
  150. fp = neg
  151.  
  152. sumprec = 0
  153. # IPython.embed()
  154. for x in ranked_labels:
  155. precision = tp / (tp + fp)
  156. recall = tp / (tp + fn)
  157.  
  158. if x:
  159. sumprec += precision
  160.  
  161. # print (x, tp,tn,fp,fn, precision, recall, sumprec)
  162. # IPython.embed()
  163.  
  164. tp -= x
  165. fn += x
  166. fp -= not x
  167. tn += not x
  168.  
  169. return sumprec / pos
  170.  
  171.  
  172. def sample_gauss_2d(nclasses, nsamples):
  173. # create the distributions and groundtruth labels
  174. Gs = []
  175. Ys = []
  176. for i in range(nclasses):
  177. Gs.append(Random2DGaussian())
  178. Ys.append(i)
  179.  
  180. # sample the dataset
  181. X = np.vstack([G.get_sample(nsamples) for G in Gs])
  182. Y_ = np.hstack([[Y] * nsamples for Y in Ys])
  183.  
  184. return X, Y_
  185.  
  186.  
  187. def sample_gmm_2d(ncomponents, nclasses, nsamples):
  188. # create the distributions and groundtruth labels
  189. Gs = []
  190. Ys = []
  191. for i in range(ncomponents):
  192. Gs.append(Random2DGaussian())
  193. Ys.append(np.random.randint(nclasses))
  194.  
  195. # sample the dataset
  196. X = np.vstack([G.get_sample(nsamples) for G in Gs])
  197. Y_ = np.hstack([[Y] * nsamples for Y in Ys])
  198.  
  199. return X, Y_
  200.  
  201.  
  202. def myDummyDecision(X):
  203. scores = X[:, 0] + X[:, 1] - 5
  204. return scores
  205.  
  206.  
  207. if __name__ == "__main__":
  208. np.random.seed(100)
  209.  
  210. # get data
  211. X, Y_ = sample_gmm_2d(4, 2, 30)
  212.  
  213. # get the class predictions
  214. Y = myDummyDecision(X) > 0.5
  215.  
  216. # graph the decision surface
  217. rect = (np.min(X, axis=0), np.max(X, axis=0))
  218. graph_surface(myDummyDecision, rect, offset=0)
  219.  
  220. # graph the data points
  221. graph_data(X, Y_, Y, special=[])
  222.  
  223. plt.show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement