Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Enrico Costanzo (201124039)
- import os
- from math import pow, sqrt, fabs
- from numpy import std, mean
- clear = lambda: os.system('cls')
- class Point:
- def __init__(self, x, y, c):
- self.x = x
- self.y = y
- self.c = c # class 1 or -1
- class ResultObject:
- def __init__(self,
- mean_x_class0, mean_y_class0, mean_x_class1, mean_y_class1,
- stddev_x_class0, stddev_y_class0, stddev_x_class1, stddev_y_class1):
- self.mean_x_class0 = mean_x_class0
- self.mean_y_class0 = mean_y_class0
- self.mean_x_class1 = mean_x_class1
- self.mean_y_class1 = mean_y_class1
- self.stddev_x_class0 = stddev_x_class0
- self.stddev_y_class0 = stddev_y_class0
- self.stddev_x_class1 = stddev_x_class1
- self.stddev_y_class1 = stddev_y_class1
- def die(error_message):
- raise Exception(error_message)
- def read_data(filename):
- f = open(filename)
- data_line = False
- data = []
- for l in f:
- l = l.strip()
- if data_line:
- content = [float(x) for x in l.split(',')]
- if len(content) == 3:
- data.append(content)
- else:
- if l.startswith('@DATA'):
- data_line = True
- return data
- def xmean(data, axis): # axis: 0=x 1=y
- tmp = []
- for d in data:
- tmp.append(d.x) if axis == 0 else tmp.append(d.y)
- return mean(tmp)
- def stddev(data, axis): # axis: 0=x 1=y
- tmp = []
- for d in data:
- tmp.append(d.x) if axis == 0 else tmp.append(d.y)
- return std(tmp)
- def train(data):
- class0 = [] # Class 1
- class1 = [] # Class -1
- for d in data:
- p = Point(d[0], d[1], d[2])
- class0.append(p) if(p.c > 0) else class1.append(p)
- # Mean X and Y for Class 1
- mean_x_class0 = xmean(class0, 0) #x
- mean_y_class0 = xmean(class0, 1)
- # Mean X and Y for Class -1
- mean_x_class1 = xmean(class1, 0) #x
- mean_y_class1 = xmean(class1, 1)
- # Standard Deviation X and Y for Class 1
- stddev_x_class0 = stddev(class0, 0) #x
- stddev_y_class0 = stddev(class0, 1)
- # Standard Deviation X and Y for Class -1
- stddev_x_class1 = stddev(class1, 0) #x
- stddev_y_class1 = stddev(class1, 1)
- res = ResultObject(
- mean_x_class0, mean_y_class0, mean_x_class1, mean_y_class1,
- stddev_x_class0, stddev_y_class0, stddev_x_class1, stddev_y_class1
- )
- return res
- def test(p, r):
- # Vorhersage anhand der Standardabweichung
- if (fabs(p.x - r.mean_x_class0) < fabs(p.x - r.mean_x_class1)) or (fabs(p.y - r.mean_y_class0) < fabs(p.y - r.mean_y_class1)):
- prediction1 = 1
- else:
- prediction1 = -1
- # Vorhersage anhand des Durchschnittswertes
- l1 = sqrt(fabs(pow(p.x - r.mean_x_class1, 2)) + pow(fabs(p.y - r.mean_y_class1), 2))
- l2 = sqrt(fabs(pow(p.x - r.mean_x_class0, 2)) + pow(fabs(p.y - r.mean_y_class0), 2))
- prediction2 = -1 if l1 <= l2 else 1
- if prediction1 == prediction2:
- # 2/2 richtige Vorhersagen
- # Egal welche, da beide gleich sind
- prediction = prediction1
- elif prediction1 != prediction2:
- # 1/2 richtige Vorhersagen
- #Im Zweifel verlassen wir uns auf den Durchschnittswert
- prediction = prediction2
- return prediction == p.c
- def print_results(r):
- clear()
- print
- print "Class -1"
- print
- print "Attribute X"
- print "Mean: ", r.mean_x_class1
- print "StdDev: ", r.stddev_x_class1
- print
- print "Attribute Y"
- print "Mean: ", r.mean_y_class1
- print "StdDev: ", r.stddev_y_class1
- print
- print
- print "Class 1"
- print
- print "Attribute X"
- print "Mean: ", r.mean_x_class0
- print "StdDev: ", r.stddev_x_class0
- print
- print "Attribute Y"
- print "Mean: ", r.mean_y_class0
- print "StdDev: ", r.stddev_y_class0
- def eval_data(data, r):
- error_count = 0
- num_samples = 0
- for d in data:
- p = Point(d[0], d[1], d[2])
- num_samples += 1
- if not test(p, r):
- error_count += 1
- return error_count, num_samples
- if __name__ == "__main__":
- train_data = read_data("train.arff")
- train_results = train(train_data)
- print_results(train_results)
- test_data = read_data("eval.arff")
- error_absolute, num_samples = eval_data(test_data, train_results)
- error_quota = ((100 * error_absolute) / num_samples)
- print
- print "Correctly Classified Instances\t\t" ,num_samples - error_absolute, "\t", 100 - error_quota ,"%"
- print "Incorrectly Classified Instances\t",error_absolute, "\t", error_quota,"%"
- print "Number of samples\t\t\t", num_samples
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement