Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import sys
- #---------------------------------------------------------------
- def metrics(tp, fp, tn, fn, pos, neg):
- tpr = float(tp) / pos
- fpr = float(fp) / neg
- errorRate = float(fp + fn) / (pos + neg)
- accuracy = float(tp + tn) / (pos + neg)
- precision = float(tp) / (tp + fp)
- return tpr, fpr, errorRate,accuracy, precision
- def readFile(filename):
- file = open(filename, 'r')
- line = file.readline()
- specs = map(int, line.split())
- data = np.genfromtxt(file)
- file.close()
- return (data, specs)
- #--------------------------------------------------------------
- # training
- def makeCentroids(data, nA, nB, nC):
- # Centroids are just the average of the data blocks
- A = data[:nA]
- B = data[nA:nB]
- C = data[nB:nC]
- centroidA = np.mean(A, axis = 0)
- centroidB = np.mean(B, axis = 0)
- centroidC = np.mean(C, axis = 0)
- return (centroidA, centroidB, centroidC)
- def findDecisionFunction(centroidA, centroidB):
- line = np.subtract(centroidB, centroidA)
- midpoint = np.divide(np.add(centroidB, centroidA), 2)
- decision_value = decision_function(line, centroidA, midpoint)
- sign = decision_value / abs(decision_value)
- # Just a function with values stored in it
- # Use +1 or -1 to find classes
- def classAorB(point_nd):
- value = decision_function(line, point_nd, midpoint)
- if value == 0:
- decision = 1
- else:
- decision = value * sign
- return decision / abs(decision)
- return classAorB
- def decision_function(line, point_nd, midpoint):
- return np.dot(line, np.subtract(point_nd, midpoint))
- if __name__ == "__main__":
- if len(sys.argv) != 3:
- print "Received wrong number of arguments.\nUsage: \'python triclassify.py train.txt test.txt\'"
- sys.exit()
- training_file = sys.argv[1]
- test_file_name = sys.argv[2]
- data, (d, a,b,c) = readFile(training_file)
- (centroidA, centroidB, centroidC) = makeCentroids(data,a,a+b,a+b+c)
- classAOrB = findDecisionFunction(centroidA, centroidB)
- classAOrC = findDecisionFunction(centroidA, centroidC)
- classBOrC = findDecisionFunction(centroidB, centroidC)
- test_file = open(test_file_name, 'r')
- line = test_file.readline()
- (dim, numA, numB, numC) = map(int, line.split())
- keys =['A','B','C']
- truePositives = {key: 0 for key in keys}
- trueNegatives = {key: 0 for key in keys}
- falsePositives = {key: 0 for key in keys}
- falseNegatives = {key: 0 for key in keys}
- # Test A
- for i in range(numA):
- line = test_file.readline()
- point = map(float, line.split())
- if classAOrB(point) == 1:
- trueNegatives['B'] += 1
- if classAOrC(point) == 1:
- truePositives['A'] += 1
- trueNegatives['C'] += 1
- else: # C
- falseNegatives['A'] += 1
- falsePositives['C'] += 1
- else: # B or C
- falseNegatives['A'] += 1
- if classBOrC(point) == 1:
- falsePositives['B'] += 1
- trueNegatives['C'] += 1
- else: # C
- trueNegatives['B'] += 1
- falsePositives['C'] += 1
- # Test B
- for i in range(numB):
- line = test_file.readline()
- point = map(float, line.split())
- if classAOrB(point) == 1:
- falseNegatives['B'] += 1
- if classAOrC(point) == 1:
- falsePositives['A'] += 1
- trueNegatives['C'] += 1
- else: # C
- trueNegatives['A'] += 1
- falsePositives['C'] += 1
- else: # B or C
- trueNegatives['A'] += 1
- if classBOrC(point) == 1:
- truePositives['B'] += 1
- trueNegatives['C'] += 1
- else: # C
- falseNegatives['B'] += 1
- falsePositives['C'] += 1
- # Test C
- for i in range(numC):
- line = test_file.readline()
- point = map(float, line.split())
- if classAOrB(point) == 1:
- trueNegatives['B']+= 1
- if classAOrC(point) == 1:
- falsePositives['A'] += 1
- falseNegatives['C'] += 1
- else: # C
- trueNegatives['A'] += 1
- truePositives['C'] += 1
- else: # B or C
- trueNegatives['A'] += 1
- if classBOrC(point) == 1:
- falsePositives['B'] += 1
- falseNegatives['C'] += 1
- else: # C
- trueNegatives['B'] += 1
- truePositives['C'] += 1
- test_file.close()
- #Calculate Metrics
- truePositiveRateA, falsePositiveRateA, errorRateA,accuracyA, precisionA = metrics(truePositives['A'], falsePositives['A'],trueNegatives['A'],falseNegatives['A'],numA,(numB+numC))
- truePositiveRateB, falsePositiveRateB, errorRateB,accuracyB, precisionB = metrics(truePositives['B'], falsePositives['B'],trueNegatives['B'],falseNegatives['B'],numB,(numA+numC))
- truePositiveRateC, falsePositiveRateC, errorRateC,accuracyC, precisionC = metrics(truePositives['C'], falsePositives['C'],trueNegatives['C'],falseNegatives['C'],numC,(numB+numA))
- truePositiveRate = (truePositiveRateA + truePositiveRateB + truePositiveRateC) / 3.0
- falsePositiveRate = (falsePositiveRateA + falsePositiveRateB + falsePositiveRateC) / 3.0
- errorRate = (errorRateA + errorRateB + errorRateC) / 3.0
- accuracy = (accuracyA + accuracyB + accuracyC) / 3.0
- precision = (precisionA + precisionB + precisionC) / 3.0
- print "True positive rate = %f" % truePositiveRate
- print "False positive rate = %f" % falsePositiveRate
- print "Error rate = %f" % errorRate
- print "Accuracy = %f" % accuracy
- print "Precision = %f" % precision
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement