SHARE
TWEET

help

a guest Jun 19th, 2017 43 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # -*- coding: utf-8 -*-
  2. """
  3. Created on Mon Jun 19 23:05:29 2017
  4.  
  5. @author: Ilcho
  6. """
  7. # Example of Naive Bayes implemented from Scratch in Python
  8. import csv
  9. import random
  10. import math
  11.  
  12. def loadCsv(filename):
  13.     lines = csv.reader(open(filename, "rb"))
  14.     dataset = list(lines)
  15.     for i in range(len(dataset)):
  16.         dataset[i] = [float(x) for x in dataset[i]]
  17.     return dataset
  18.  
  19. def splitDataset(dataset, splitRatio):
  20.     trainSize = int(len(dataset) * splitRatio)
  21.     trainSet = []
  22.     copy = list(dataset)
  23.     while len(trainSet) < trainSize:
  24.         index = random.randrange(len(copy))
  25.         trainSet.append(copy.pop(index))
  26.     return [trainSet, copy]
  27.  
  28. def separateByClass(dataset):
  29.     separated = {}
  30.     for i in range(len(dataset)):
  31.         vector = dataset[i]
  32.         if (vector[-1] not in separated):
  33.             separated[vector[-1]] = []
  34.         separated[vector[-1]].append(vector)
  35.     return separated
  36.  
  37. def mean(numbers):
  38.     return sum(numbers)/float(len(numbers))
  39.  
  40. def stdev(numbers):
  41.     avg = mean(numbers)
  42.     variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
  43.     return math.sqrt(variance)
  44.  
  45. def summarize(dataset):
  46.     summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]
  47.     del summaries[-1]
  48.     return summaries
  49.  
  50. def summarizeByClass(dataset):
  51.     separated = separateByClass(dataset)
  52.     summaries = {}
  53.     for classValue, instances in separated.iteritems():
  54.         summaries[classValue] = summarize(instances)
  55.     return summaries
  56.  
  57. def calculateProbability(x, mean, stdev):
  58.     exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
  59.     return (1 / (math.sqrt(2*math.pi) * stdev)) * exponent
  60.  
  61. def calculateClassProbabilities(summaries, inputVector):
  62.     probabilities = {}
  63.     for classValue, classSummaries in summaries.iteritems():
  64.         probabilities[classValue] = 1
  65.         for i in range(len(classSummaries)):
  66.             mean, stdev = classSummaries[i]
  67.             x = inputVector[i]
  68.             probabilities[classValue] *= calculateProbability(x, mean, stdev)
  69.     return probabilities
  70.            
  71. def predict(summaries, inputVector):
  72.     probabilities = calculateClassProbabilities(summaries, inputVector)
  73.     bestLabel, bestProb = None, -1
  74.     for classValue, probability in probabilities.iteritems():
  75.         if bestLabel is None or probability > bestProb:
  76.             bestProb = probability
  77.             bestLabel = classValue
  78.     return bestLabel
  79.  
  80. def getPredictions(summaries, testSet):
  81.     predictions = []
  82.     for i in range(len(testSet)):
  83.         result = predict(summaries, testSet[i])
  84.         predictions.append(result)
  85.     return predictions
  86.  
  87. def getAccuracy(testSet, predictions):
  88.     correct = 0
  89.     for i in range(len(testSet)):
  90.         if testSet[i][-1] == predictions[i]:
  91.             correct += 1
  92.     return (correct/float(len(testSet))) * 100.0
  93.  
  94. def main():
  95.     filename = 'trainInputCopy.csv'
  96.     splitRatio = 0.67
  97.     dataset = loadCsv(filename)
  98.     trainingSet, testSet = splitDataset(dataset, splitRatio)
  99.     print('Split {0} rows into train={1} and test={2} rows').format(len(dataset), len(trainingSet), len(testSet))
  100.     # prepare model
  101.     summaries = summarizeByClass(trainingSet)
  102.     # test model
  103.     predictions = getPredictions(summaries, testSet)
  104.     accuracy = getAccuracy(testSet, predictions)
  105.     print('Accuracy: {0}%').format(accuracy)
  106.  
  107. main()
RAW Paste Data
Pastebin PRO Summer Special!
Get 40% OFF on Pastebin PRO accounts!
Top