• API
• FAQ
• Tools
• Trends
• Archive
daily pastebin goal
81%
SHARE
TWEET

# help

a guest Jun 19th, 2017 46 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. # -*- coding: utf-8 -*-
2. """
3. Created on Mon Jun 19 23:05:29 2017
4.
5. @author: Ilcho
6. """
7. # Example of Naive Bayes implemented from Scratch in Python
8. import csv
9. import random
10. import math
11.
14.     dataset = list(lines)
15.     for i in range(len(dataset)):
16.         dataset[i] = [float(x) for x in dataset[i]]
17.     return dataset
18.
19. def splitDataset(dataset, splitRatio):
20.     trainSize = int(len(dataset) * splitRatio)
21.     trainSet = []
22.     copy = list(dataset)
23.     while len(trainSet) < trainSize:
24.         index = random.randrange(len(copy))
25.         trainSet.append(copy.pop(index))
26.     return [trainSet, copy]
27.
28. def separateByClass(dataset):
29.     separated = {}
30.     for i in range(len(dataset)):
31.         vector = dataset[i]
32.         if (vector[-1] not in separated):
33.             separated[vector[-1]] = []
34.         separated[vector[-1]].append(vector)
35.     return separated
36.
37. def mean(numbers):
38.     return sum(numbers)/float(len(numbers))
39.
40. def stdev(numbers):
41.     avg = mean(numbers)
42.     variance = sum([pow(x-avg,2) for x in numbers])/float(len(numbers)-1)
43.     return math.sqrt(variance)
44.
45. def summarize(dataset):
46.     summaries = [(mean(attribute), stdev(attribute)) for attribute in zip(*dataset)]
47.     del summaries[-1]
48.     return summaries
49.
50. def summarizeByClass(dataset):
51.     separated = separateByClass(dataset)
52.     summaries = {}
53.     for classValue, instances in separated.iteritems():
54.         summaries[classValue] = summarize(instances)
55.     return summaries
56.
57. def calculateProbability(x, mean, stdev):
58.     exponent = math.exp(-(math.pow(x-mean,2)/(2*math.pow(stdev,2))))
59.     return (1 / (math.sqrt(2*math.pi) * stdev)) * exponent
60.
61. def calculateClassProbabilities(summaries, inputVector):
62.     probabilities = {}
63.     for classValue, classSummaries in summaries.iteritems():
64.         probabilities[classValue] = 1
65.         for i in range(len(classSummaries)):
66.             mean, stdev = classSummaries[i]
67.             x = inputVector[i]
68.             probabilities[classValue] *= calculateProbability(x, mean, stdev)
69.     return probabilities
70.
71. def predict(summaries, inputVector):
72.     probabilities = calculateClassProbabilities(summaries, inputVector)
73.     bestLabel, bestProb = None, -1
74.     for classValue, probability in probabilities.iteritems():
75.         if bestLabel is None or probability > bestProb:
76.             bestProb = probability
77.             bestLabel = classValue
78.     return bestLabel
79.
80. def getPredictions(summaries, testSet):
81.     predictions = []
82.     for i in range(len(testSet)):
83.         result = predict(summaries, testSet[i])
84.         predictions.append(result)
85.     return predictions
86.
87. def getAccuracy(testSet, predictions):
88.     correct = 0
89.     for i in range(len(testSet)):
90.         if testSet[i][-1] == predictions[i]:
91.             correct += 1
92.     return (correct/float(len(testSet))) * 100.0
93.
94. def main():
95.     filename = 'trainInputCopy.csv'
96.     splitRatio = 0.67