Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from scipy.stats import norm
- class Naive_Bayes_Classifier(object):
- def train (self, X, y):
- """
- Calculates population and class-wise mean and standard deviation
- """
- # Population mean and standard deviation
- self.classes = set(y)
- self.mean = np.mean(X, axis=0)
- self.std = np.std(X, axis=0)
- # Class mean and standard deviation
- self.c_mean = np.zeros((len(self.classes), X.shape[1]))
- self.c_std = np.zeros((len(self.classes), X.shape[1]))
- self.prior = np.zeros((len(self.classes),))
- for c in self.classes:
- indices = np.where(y == c)
- self.prior[c] = indices[0].shape[0] / float(y.shape[0])
- self.c_mean[c] = np.mean(X[indices], axis=0)
- self.c_std[c] = np.std(X[indices], axis=0)
- return
- def predict (self, X):
- """
- Calculates observations' posteriors and returns class with
- maximum posterior.
- """
- p = []
- for obs in X:
- tiled = np.repeat([obs], len(self.classes), axis=0)
- # Probability of observation in population
- evidence = norm.pdf((self.mean - obs) / self.std)
- evidence = np.prod(evidence)
- # Probability of observation in each class
- likelihood = norm.pdf((tiled - self.c_mean) / self.c_std)
- likelihood = np.prod(likelihood, axis=1)
- # Probability of each class given observation
- posterior = self.prior * likelihood / evidence
- p.append(np.argmax(posterior))
- return p
- if __name__ == "__main__":
- # Make some random, easy data
- np.random.seed(100)
- n = 100
- cat_1 = np.array([np.random.normal(0, 1, n), np.random.normal(3, 1, n)]).T
- cat_2 = np.array([np.random.normal(3, 1, n), np.random.normal(0, 1, n)]).T
- X = np.vstack((cat_1, cat_2))
- y = np.array([0 for _ in cat_1] + [1 for _ in cat_2])
- # Train and predict
- clf = Naive_Bayes_Classifier()
- clf.train(X, y)
- p = clf.predict(X)
- for x, prediction, actual in zip(X, p, y):
- print x, actual, prediction
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement