Advertisement
Guest User

Untitled

a guest
Jun 27th, 2016
64
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.13 KB | None | 0 0
  1. import numpy as np
  2. from scipy.stats import norm
  3.  
  4. class Naive_Bayes_Classifier(object):
  5.  
  6. def train (self, X, y):
  7.  
  8. """
  9. Calculates population and class-wise mean and standard deviation
  10. """
  11.  
  12. # Population mean and standard deviation
  13.  
  14. self.classes = set(y)
  15. self.mean = np.mean(X, axis=0)
  16. self.std = np.std(X, axis=0)
  17.  
  18. # Class mean and standard deviation
  19.  
  20. self.c_mean = np.zeros((len(self.classes), X.shape[1]))
  21. self.c_std = np.zeros((len(self.classes), X.shape[1]))
  22. self.prior = np.zeros((len(self.classes),))
  23.  
  24. for c in self.classes:
  25. indices = np.where(y == c)
  26. self.prior[c] = indices[0].shape[0] / float(y.shape[0])
  27. self.c_mean[c] = np.mean(X[indices], axis=0)
  28. self.c_std[c] = np.std(X[indices], axis=0)
  29.  
  30. return
  31.  
  32. def predict (self, X):
  33.  
  34. """
  35. Calculates observations' posteriors and returns class with
  36. maximum posterior.
  37. """
  38.  
  39. p = []
  40.  
  41. for obs in X:
  42.  
  43. tiled = np.repeat([obs], len(self.classes), axis=0)
  44.  
  45. # Probability of observation in population
  46.  
  47. evidence = norm.pdf((self.mean - obs) / self.std)
  48. evidence = np.prod(evidence)
  49.  
  50. # Probability of observation in each class
  51.  
  52. likelihood = norm.pdf((tiled - self.c_mean) / self.c_std)
  53. likelihood = np.prod(likelihood, axis=1)
  54.  
  55. # Probability of each class given observation
  56.  
  57. posterior = self.prior * likelihood / evidence
  58. p.append(np.argmax(posterior))
  59.  
  60. return p
  61.  
  62. if __name__ == "__main__":
  63.  
  64. # Make some random, easy data
  65.  
  66. np.random.seed(100)
  67. n = 100
  68. cat_1 = np.array([np.random.normal(0, 1, n), np.random.normal(3, 1, n)]).T
  69. cat_2 = np.array([np.random.normal(3, 1, n), np.random.normal(0, 1, n)]).T
  70.  
  71. X = np.vstack((cat_1, cat_2))
  72. y = np.array([0 for _ in cat_1] + [1 for _ in cat_2])
  73.  
  74. # Train and predict
  75.  
  76. clf = Naive_Bayes_Classifier()
  77. clf.train(X, y)
  78. p = clf.predict(X)
  79.  
  80. for x, prediction, actual in zip(X, p, y):
  81. print x, actual, prediction
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement