Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from _future_ import division
- import math
- import numpy as np
- from sklearn.base import BaseEstimator
- class NaiveBayesNominal:
- def __init__(self):
- self.x_prior = dict()
- self.y_prior = dict()
- def fit(self, X, y):
- modelClasses, classesCounts = np.unique(y, return_counts=True)
- self.y_prior = dict(zip(modelClasses, [count / len(y) for count in classesCounts]))
- for attributeIndex, attributeValues in enumerate(X.T):
- uniqueAttributesValues, valuesCounts = np.unique(attributeValues, return_counts=True)
- attributeValuesCounts = dict(zip(uniqueAttributesValues, valuesCounts))
- attributeValuesClassesPairs = np.array([(attributeValue, modelClass) for
- attributeValue, modelClass in zip(attributeValues, y)],
- dtype=[('x', 'i4'), ('y', 'i4')])
- uniqueAttributeClassPairs, pairsCounts = np.unique(attributeValuesClassesPairs, return_counts=True)
- self.x_prior[attributeIndex] = dict(zip([(uniqueAttributeValues, modelClass)
- for uniqueAttributeValues, modelClass in uniqueAttributeClassPairs],
- [pairCount / attributeValuesCounts[attributeClassPair[0]] for
- attributeClassPair, pairCount in zip(uniqueAttributeClassPairs, pairsCounts)]))
- def predict_proba(self, X):
- results = []
- for attributesValues in X:
- probabilities = dict()
- for modelClass in self.y_prior:
- probability = self.y_prior[modelClass]
- for attributeIndex, attributeValue in enumerate(attributesValues):
- probability = probability * self.x_prior[attributeIndex][(attributeValue, modelClass)]
- probabilities[modelClass] = probability
- results.append(probabilities)
- normalizedResults = []
- for result in results:
- factor = 1.0 / sum(result.itervalues())
- normalizedResults.append({k: v * factor for k, v in result.iteritems()})
- return np.array(normalizedResults)
- def predict(self, X):
- return np.array([max(probability, key=probability.get) for probability in self.predict_proba(X)])
- class NaiveBayesGaussian:
- def __init__(self):
- self.y_prior = dict()
- self.x_parameters = dict()
- def fit(self, X, y):
- modelClasses, classesCounts = np.unique(y, return_counts=True)
- self.y_prior = dict(zip(modelClasses, [count / len(y) for count in classesCounts]))
- for attributeIndex, attributeValues in enumerate(X.T):
- attributeValuesPerClass = dict()
- for modelClass, attributeValue in zip(y, attributeValues):
- attributeValuesPerClass.setdefault(modelClass, []).append(attributeValue)
- attributeParameters = dict()
- for modelClass in attributeValuesPerClass:
- attributeParameters[modelClass] = (np.mean(attributeValuesPerClass[modelClass]),
- np.std(attributeValuesPerClass[modelClass]))
- self.x_parameters[attributeIndex] = attributeParameters
- def predict_proba(self, X):
- results = []
- for attributesValues in X:
- probabilities = dict()
- for modelClass in self.y_prior:
- probability = self.y_prior[modelClass]
- for attributeIndex, attributeValue in enumerate(attributesValues):
- probability = probability * self.gaussian(self.x_parameters[attributeIndex][modelClass][0],
- self.x_parameters[attributeIndex][modelClass][1],
- attributeValue)
- probabilities[modelClass] = probability
- results.append(probabilities)
- normalizedResults = []
- for result in results:
- factor = 1.0 / sum(result.itervalues())
- normalizedResults.append({k: v * factor for k, v in result.iteritems()})
- return np.array(normalizedResults)
- def predict(self, X):
- return np.array([max(probability, key=probability.get) for probability in self.predict_proba(X)])
- def gaussian(self, mean, std, x):
- ePart = math.pow(math.e, -(x - mean) * 2 / (2 * std * 2))
- return (1.0 / (math.sqrt(2 * math.pi) * std)) * ePart
- class NaiveBayesNumNom(BaseEstimator):
- def __init__(self, is_cat=None, m=0.0):
- raise NotImplementedError
- def fit(self, X, yy):
- raise NotImplementedError
- def predict_proba(self, X):
- raise NotImplementedError
- def predict(self, X):
- raise NotImplementedError
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement