Advertisement
Guest User

Untitled

a guest
May 28th, 2017
59
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.36 KB | None | 0 0
  1. import util
  2. import math
  3.  
  4. class NaiveBayesClassifier(object):
  5.     """
  6.    See the project description for the specifications of the Naive Bayes classifier.
  7.  
  8.    Note that the variable 'datum' in this code refers to a counter of features
  9.    (not to a raw samples.Datum).
  10.    """
  11.     def __init__(self, legalLabels, smoothing=0, logTransform=False, featureValues=util.Counter()):
  12.         self.legalLabels = legalLabels
  13.         self.type = "naivebayes"
  14.         self.k = int(smoothing) # this is the smoothing parameter, ** use it in your train method **
  15.         self.logTransform = logTransform
  16.         self.featureValues = featureValues # empty if there is no smoothing
  17.  
  18.     def fit(self, trainingData, trainingLabels):
  19.         """
  20.        Trains the classifier by collecting counts over the training data, and
  21.        stores the Laplace smoothed estimates so that they can be used to classify.
  22.        
  23.        trainingData is a list of feature dictionaries.  The corresponding
  24.        label lists contain the correct label for each instance.
  25.  
  26.        To get the list of all possible features or labels, use self.features and self.legalLabels.
  27.        """
  28.  
  29.         self.features = trainingData[0].keys()  # the names of the features in the dataset
  30.  
  31.         self.prior = util.Counter()  # probability over labels
  32.         self.conditionalProb = util.Counter()  # Conditional probability of feature feat for a given class having value v
  33.                                       # HINT: could be indexed by (feat, label, value)
  34.  
  35.         TODO:
  36.         construct (and store) the normalized smoothed priors and conditional probabilities
  37.         for i in range(0, len(trainingLabels)):
  38.             print "labela je: ",  trainingLabels[i]
  39.         print trainingLabels
  40.         print trainingData
  41.  
  42.         datasetLenght = len(trainingLabels)
  43.         for i in range(datasetLenght):
  44.             self.prior[trainingLabels[i]] += 1.0
  45.         # for label in self.prior.keys():
  46.         #     print label, ": ", self.prior[label]
  47.         for i in range(datasetLenght):
  48.             for key in self.features:
  49.                 value = trainingData[i][key]
  50.                 self.conditionalProb[(trainingLabels[i],key, value)] += 1.0
  51.         # for feat, label, value in self.conditionalProb.keys():
  52.         #     print feat, " ", label, " ", value, " ", self.conditionalProb[(feat, label, value)]
  53.         "***Sada imamo spremljene brojeve pojavljivanja svih kombinacija, pa racunamo vjerojatnosti***"
  54.         for feat, label, value in self.conditionalProb.keys():
  55.             Nuvjetno = self.conditionalProb[(feat, label, value)]
  56.             Nukupno = self.prior[feat]
  57.  
  58.             if self.k > 0:
  59.                 Nuvjetno += self.k
  60.                 Nukupno += len(self.featureValues[label])*self.k
  61.  
  62.             self.conditionalProb[(feat, label, value)] = Nuvjetno/float(Nukupno)
  63.  
  64.         for l in self.legalLabels:
  65.             self.prior[l] = self.prior[l] / float(datasetLenght)
  66.         "*** YOUR CODE HERE ***"
  67.         # values = set()
  68.         #
  69.         # # count prior and conditional probs
  70.         # for i, currentLabel in enumerate(trainingLabels):
  71.         #     self.prior[currentLabel] += 1.
  72.         #     for ftr in self.features:
  73.         #         values.add(trainingData[i][ftr])
  74.         #         self.conditionalProb[(ftr, currentLabel, trainingData[i][ftr])] += 1.
  75.         #
  76.         # # smoothing
  77.         # for label in self.legalLabels:
  78.         #    for feat in self.features:
  79.         #        for val in values:
  80.         #            self.conditionalProb[(feat, label, val)] += self.k
  81.         #
  82.         # # normalize prior
  83.         # self.prior.normalize()
  84.         #
  85.         # counterPerLabFeat = util.Counter()
  86.         # for label in self.legalLabels:
  87.         #     for feat in self.features:
  88.         #         for val in values:
  89.         #             counterPerLabFeat[(feat, label)] += self.conditionalProb[(feat, label, val)]
  90.         # for key in self.conditionalProb.iterkeys():
  91.         #     self.conditionalProb[key] /= counterPerLabFeat[(key[0], key[1])]
  92.  
  93.  
  94.  
  95.     def predict(self, testData):
  96.         """
  97.        Classify the data based on the posterior distribution over labels.
  98.  
  99.        You shouldn't modify this method.
  100.        """
  101.  
  102.         guesses = []
  103.         self.posteriors = [] # posterior probabilities are stored for later data analysis.
  104.        
  105.         for instance in testData:
  106.             if self.logTransform:
  107.                 posterior = self.calculateLogJointProbabilities(instance)
  108.             else:
  109.                 posterior = self.calculateJointProbabilities(instance)
  110.             guesses.append(posterior.argMax())
  111.             self.posteriors.append(posterior)
  112.         return guesses
  113.  
  114.  
  115.     def calculateJointProbabilities(self, instance):
  116.         """
  117.        Returns the joint distribution over legal labels and the instance.
  118.        Each probability should be stored in the joint counter, e.g.
  119.        Joint[3] = <Estimate of ( P(Label = 3, instance) )>
  120.  
  121.        To get the list of all possible features or labels, use self.features and
  122.        self.legalLabels.
  123.        """
  124.         joint = util.Counter()
  125.  
  126.         for label in self.legalLabels:
  127.             # calculate the joint probabilities for each class
  128.             "*** YOUR CODE HERE ***"
  129.             joint[label] = self.prior[label]
  130.             for feature in self.features:
  131.                 pVal = self.conditionalProb[(label, feature, instance[feature])]
  132.                 joint[label] = joint[label] * pVal
  133.         return joint
  134.  
  135.  
  136.     def calculateLogJointProbabilities(self, instance):
  137.         """
  138.        Returns the log-joint distribution over legal labels and the instance.
  139.        Each log-probability should be stored in the log-joint counter, e.g.
  140.        logJoint[3] = <Estimate of log( P(Label = 3, instance) )>
  141.  
  142.        To get the list of all possible features or labels, use self.features and
  143.        self.legalLabels.
  144.        """
  145.         logJoint = util.Counter()
  146.  
  147.         for label in self.legalLabels:
  148.             #calculate the log joint probabilities for each class
  149.             "*** YOUR CODE HERE ***"
  150.             logJoint[label] = math.log(self.prior[label])
  151.             for feature in self.features:
  152.                 pVal = self.conditionalProb[(label, feature, instance[feature])]
  153.                 logJoint[label] = logJoint[label] + math.log(pVal)
  154.         return logJoint
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement