Advertisement
Guest User

Untitled

a guest
Nov 13th, 2018
169
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.82 KB | None | 0 0
  1. from _future_ import division
  2.  
  3. import math
  4.  
  5. import numpy as np
  6. from sklearn.base import BaseEstimator
  7.  
  8.  
  9. class NaiveBayesNominal:
  10.     def __init__(self):
  11.         self.x_prior = dict()
  12.         self.y_prior = dict()
  13.  
  14.     def fit(self, X, y):
  15.         modelClasses, classesCounts = np.unique(y, return_counts=True)
  16.  
  17.         self.y_prior = dict(zip(modelClasses, [count / len(y) for count in classesCounts]))
  18.  
  19.         for attributeIndex, attributeValues in enumerate(X.T):
  20.             uniqueAttributesValues, valuesCounts = np.unique(attributeValues, return_counts=True)
  21.             attributeValuesCounts = dict(zip(uniqueAttributesValues, valuesCounts))
  22.             attributeValuesClassesPairs = np.array([(attributeValue, modelClass) for
  23.                                                     attributeValue, modelClass in zip(attributeValues, y)],
  24.                                            dtype=[('x', 'i4'), ('y', 'i4')])
  25.             uniqueAttributeClassPairs, pairsCounts = np.unique(attributeValuesClassesPairs, return_counts=True)
  26.  
  27.             self.x_prior[attributeIndex] = dict(zip([(uniqueAttributeValues, modelClass)
  28.                                                      for uniqueAttributeValues, modelClass in uniqueAttributeClassPairs],
  29.                 [pairCount / attributeValuesCounts[attributeClassPair[0]] for
  30.                 attributeClassPair, pairCount in zip(uniqueAttributeClassPairs, pairsCounts)]))
  31.  
  32.     def predict_proba(self, X):
  33.         results = []
  34.  
  35.         for attributesValues in X:
  36.             probabilities = dict()
  37.  
  38.             for modelClass in self.y_prior:
  39.                 probability = self.y_prior[modelClass]
  40.  
  41.                 for attributeIndex, attributeValue in enumerate(attributesValues):
  42.                     probability = probability * self.x_prior[attributeIndex][(attributeValue, modelClass)]
  43.  
  44.                 probabilities[modelClass] = probability
  45.             results.append(probabilities)
  46.  
  47.         normalizedResults = []
  48.  
  49.         for result in results:
  50.             factor = 1.0 / sum(result.itervalues())
  51.             normalizedResults.append({k: v * factor for k, v in result.iteritems()})
  52.  
  53.         return np.array(normalizedResults)
  54.  
  55.     def predict(self, X):
  56.         return np.array([max(probability, key=probability.get) for probability in self.predict_proba(X)])
  57.  
  58. class NaiveBayesGaussian:
  59.     def __init__(self):
  60.         self.y_prior = dict()
  61.         self.x_parameters = dict()
  62.  
  63.     def fit(self, X, y):
  64.         modelClasses, classesCounts = np.unique(y, return_counts=True)
  65.  
  66.         self.y_prior = dict(zip(modelClasses, [count / len(y) for count in classesCounts]))
  67.  
  68.         for attributeIndex, attributeValues in enumerate(X.T):
  69.             attributeValuesPerClass = dict()
  70.  
  71.             for modelClass, attributeValue in zip(y, attributeValues):
  72.                 attributeValuesPerClass.setdefault(modelClass, []).append(attributeValue)
  73.  
  74.             attributeParameters = dict()
  75.  
  76.             for modelClass in attributeValuesPerClass:
  77.                 attributeParameters[modelClass] = (np.mean(attributeValuesPerClass[modelClass]),
  78.                                                    np.std(attributeValuesPerClass[modelClass]))
  79.  
  80.             self.x_parameters[attributeIndex] = attributeParameters
  81.  
  82.     def predict_proba(self, X):
  83.         results = []
  84.  
  85.         for attributesValues in X:
  86.             probabilities = dict()
  87.  
  88.             for modelClass in self.y_prior:
  89.                 probability = self.y_prior[modelClass]
  90.  
  91.                 for attributeIndex, attributeValue in enumerate(attributesValues):
  92.                     probability = probability * self.gaussian(self.x_parameters[attributeIndex][modelClass][0],
  93.                                                               self.x_parameters[attributeIndex][modelClass][1],
  94.                                                               attributeValue)
  95.  
  96.                     probabilities[modelClass] = probability
  97.             results.append(probabilities)
  98.  
  99.         normalizedResults = []
  100.  
  101.         for result in results:
  102.             factor = 1.0 / sum(result.itervalues())
  103.             normalizedResults.append({k: v * factor for k, v in result.iteritems()})
  104.  
  105.         return np.array(normalizedResults)
  106.  
  107.     def predict(self, X):
  108.         return np.array([max(probability, key=probability.get) for probability in self.predict_proba(X)])
  109.  
  110.     def gaussian(self, mean, std, x):
  111.         ePart = math.pow(math.e, -(x - mean) * 2 / (2 * std * 2))
  112.  
  113.         return (1.0 / (math.sqrt(2 * math.pi) * std)) * ePart
  114.  
  115.  
  116. class NaiveBayesNumNom(BaseEstimator):
  117.     def __init__(self, is_cat=None, m=0.0):
  118.         raise NotImplementedError
  119.  
  120.     def fit(self, X, yy):
  121.         raise NotImplementedError
  122.  
  123.     def predict_proba(self, X):
  124.         raise NotImplementedError
  125.  
  126.     def predict(self, X):
  127.         raise NotImplementedError
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement