Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from __future__ import print_function
- trainingData=[
- [6.3,2.9,5.6,1.8,'I. virginica'],
- [6.5,3.0,5.8,2.2,'I. virginica'],
- [7.6,3.0,6.6,2.1,'I. virginica'],
- [4.9,2.5,4.5,1.7,'I. virginica'],
- [7.3,2.9,6.3,1.8,'I. virginica'],
- [6.7,2.5,5.8,1.8,'I. virginica'],
- [7.2,3.6,6.1,2.5,'I. virginica'],
- [6.5,3.2,5.1,2.0,'I. virginica'],
- [6.4,2.7,5.3,1.9,'I. virginica'],
- [6.8,3.0,5.5,2.1,'I. virginica'],
- [5.7,2.5,5.0,2.0,'I. virginica'],
- [5.8,2.8,5.1,2.4,'I. virginica'],
- [6.4,3.2,5.3,2.3,'I. virginica'],
- [6.5,3.0,5.5,1.8,'I. virginica'],
- [7.7,3.8,6.7,2.2,'I. virginica'],
- [7.7,2.6,6.9,2.3,'I. virginica'],
- [6.0,2.2,5.0,1.5,'I. virginica'],
- [6.9,3.2,5.7,2.3,'I. virginica'],
- [5.6,2.8,4.9,2.0,'I. virginica'],
- [7.7,2.8,6.7,2.0,'I. virginica'],
- [6.3,2.7,4.9,1.8,'I. virginica'],
- [6.7,3.3,5.7,2.1,'I. virginica'],
- [7.2,3.2,6.0,1.8,'I. virginica'],
- [6.2,2.8,4.8,1.8,'I. virginica'],
- [6.1,3.0,4.9,1.8,'I. virginica'],
- [6.4,2.8,5.6,2.1,'I. virginica'],
- [7.2,3.0,5.8,1.6,'I. virginica'],
- [7.4,2.8,6.1,1.9,'I. virginica'],
- [7.9,3.8,6.4,2.0,'I. virginica'],
- [6.4,2.8,5.6,2.2,'I. virginica'],
- [6.3,2.8,5.1,1.5,'I. virginica'],
- [6.1,2.6,5.6,1.4,'I. virginica'],
- [7.7,3.0,6.1,2.3,'I. virginica'],
- [6.3,3.4,5.6,2.4,'I. virginica'],
- [5.1,3.5,1.4,0.2,'I. setosa'],
- [4.9,3.0,1.4,0.2,'I. setosa'],
- [4.7,3.2,1.3,0.2,'I. setosa'],
- [4.6,3.1,1.5,0.2,'I. setosa'],
- [5.0,3.6,1.4,0.2,'I. setosa'],
- [5.4,3.9,1.7,0.4,'I. setosa'],
- [4.6,3.4,1.4,0.3,'I. setosa'],
- [5.0,3.4,1.5,0.2,'I. setosa'],
- [4.4,2.9,1.4,0.2,'I. setosa'],
- [4.9,3.1,1.5,0.1,'I. setosa'],
- [5.4,3.7,1.5,0.2,'I. setosa'],
- [4.8,3.4,1.6,0.2,'I. setosa'],
- [4.8,3.0,1.4,0.1,'I. setosa'],
- [4.3,3.0,1.1,0.1,'I. setosa'],
- [5.8,4.0,1.2,0.2,'I. setosa'],
- [5.7,4.4,1.5,0.4,'I. setosa'],
- [5.4,3.9,1.3,0.4,'I. setosa'],
- [5.1,3.5,1.4,0.3,'I. setosa'],
- [5.7,3.8,1.7,0.3,'I. setosa'],
- [5.1,3.8,1.5,0.3,'I. setosa'],
- [5.4,3.4,1.7,0.2,'I. setosa'],
- [5.1,3.7,1.5,0.4,'I. setosa'],
- [4.6,3.6,1.0,0.2,'I. setosa'],
- [5.1,3.3,1.7,0.5,'I. setosa'],
- [4.8,3.4,1.9,0.2,'I. setosa'],
- [5.0,3.0,1.6,0.2,'I. setosa'],
- [5.0,3.4,1.6,0.4,'I. setosa'],
- [5.2,3.5,1.5,0.2,'I. setosa'],
- [5.2,3.4,1.4,0.2,'I. setosa'],
- [5.5,2.3,4.0,1.3,'I. versicolor'],
- [6.5,2.8,4.6,1.5,'I. versicolor'],
- [5.7,2.8,4.5,1.3,'I. versicolor'],
- [6.3,3.3,4.7,1.6,'I. versicolor'],
- [4.9,2.4,3.3,1.0,'I. versicolor'],
- [6.6,2.9,4.6,1.3,'I. versicolor'],
- [5.2,2.7,3.9,1.4,'I. versicolor'],
- [5.0,2.0,3.5,1.0,'I. versicolor'],
- [5.9,3.0,4.2,1.5,'I. versicolor'],
- [6.0,2.2,4.0,1.0,'I. versicolor'],
- [6.1,2.9,4.7,1.4,'I. versicolor'],
- [5.6,2.9,3.6,1.3,'I. versicolor'],
- [6.7,3.1,4.4,1.4,'I. versicolor'],
- [5.6,3.0,4.5,1.5,'I. versicolor'],
- [5.8,2.7,4.1,1.0,'I. versicolor'],
- [6.2,2.2,4.5,1.5,'I. versicolor'],
- [5.6,2.5,3.9,1.1,'I. versicolor'],
- [5.9,3.2,4.8,1.8,'I. versicolor'],
- [6.1,2.8,4.0,1.3,'I. versicolor'],
- [6.3,2.5,4.9,1.5,'I. versicolor'],
- [6.1,2.8,4.7,1.2,'I. versicolor'],
- [6.4,2.9,4.3,1.3,'I. versicolor'],
- [6.6,3.0,4.4,1.4,'I. versicolor'],
- [6.8,2.8,4.8,1.4,'I. versicolor'],
- [6.7,3.0,5.0,1.7,'I. versicolor'],
- [6.0,2.9,4.5,1.5,'I. versicolor'],
- [5.7,2.6,3.5,1.0,'I. versicolor'],
- [5.5,2.4,3.8,1.1,'I. versicolor'],
- [5.5,2.4,3.7,1.0,'I. versicolor'],
- [5.8,2.7,3.9,1.2,'I. versicolor'],
- [6.0,2.7,5.1,1.6,'I. versicolor'],
- [5.4,3.0,4.5,1.5,'I. versicolor'],
- [6.0,3.4,4.5,1.6,'I. versicolor'],
- [6.7,3.1,4.7,1.5,'I. versicolor'],
- [6.3,2.3,4.4,1.3,'I. versicolor'],
- [5.6,3.0,4.1,1.3,'I. versicolor'],
- [5.5,2.5,4.0,1.3,'I. versicolor'],
- [5.5,2.6,4.4,1.2,'I. versicolor'],
- [6.1,3.0,4.6,1.4,'I. versicolor'],
- [5.8,2.6,4.0,1.2,'I. versicolor'],
- [5.0,2.3,3.3,1.0,'I. versicolor'],
- [5.6,2.7,4.2,1.3,'I. versicolor'],
- [5.7,3.0,4.2,1.2,'I. versicolor'],
- [5.7,2.9,4.2,1.3,'I. versicolor'],
- [6.2,2.9,4.3,1.3,'I. versicolor'],
- [5.1,2.5,3.0,1.1,'I. versicolor'],
- [5.7,2.8,4.1,1.3,'I. versicolor'],
- [6.4,3.1,5.5,1.8,'I. virginica'],
- [6.0,3.0,4.8,1.8,'I. virginica'],
- [6.9,3.1,5.4,2.1,'I. virginica'],
- [6.7,3.1,5.6,2.4,'I. virginica'],
- [6.9,3.1,5.1,2.3,'I. virginica'],
- [5.8,2.7,5.1,1.9,'I. virginica'],
- [6.8,3.2,5.9,2.3,'I. virginica'],
- [6.7,3.3,5.7,2.5,'I. virginica'],
- [6.7,3.0,5.2,2.3,'I. virginica'],
- [6.3,2.5,5.0,1.9,'I. virginica'],
- [6.5,3.0,5.2,2.0,'I. virginica'],
- [6.2,3.4,5.4,2.3,'I. virginica'],
- [4.7,3.2,1.6,0.2,'I. setosa'],
- [4.8,3.1,1.6,0.2,'I. setosa'],
- [5.4,3.4,1.5,0.4,'I. setosa'],
- [5.2,4.1,1.5,0.1,'I. setosa'],
- [5.5,4.2,1.4,0.2,'I. setosa'],
- [4.9,3.1,1.5,0.2,'I. setosa'],
- [5.0,3.2,1.2,0.2,'I. setosa'],
- [5.5,3.5,1.3,0.2,'I. setosa'],
- [4.9,3.6,1.4,0.1,'I. setosa'],
- [4.4,3.0,1.3,0.2,'I. setosa'],
- [5.1,3.4,1.5,0.2,'I. setosa'],
- [5.0,3.5,1.3,0.3,'I. setosa'],
- [4.5,2.3,1.3,0.3,'I. setosa'],
- [4.4,3.2,1.3,0.2,'I. setosa'],
- [5.0,3.5,1.6,0.6,'I. setosa'],
- [5.1,3.8,1.9,0.4,'I. setosa'],
- [4.8,3.0,1.4,0.3,'I. setosa'],
- [5.1,3.8,1.6,0.2,'I. setosa'],
- [5.9,3.0,5.1,1.8,'I. virginica']
- ]
- # my_data=[line.split('\t') for line in file('decision_tree_example.txt')]
- class decisionnode:
- def __init__(self, col=-1, value=None, results=None, tb=None, fb=None):
- self.col = col
- self.value = value
- self.results = results
- self.tb = tb
- self.fb = fb
- def sporedi_broj(row, column, value):
- return row[column] >= value
- def sporedi_string(row, column, value):
- return row[column] == value
- # Divides a set on a specific column. Can handle numeric
- # or nominal values
- def divideset(rows, column, value):
- # Make a function that tells us if a row is in
- # the first group (true) or the second group (false)
- split_function = None
- if isinstance(value, int) or isinstance(value, float): # ako vrednosta so koja sporeduvame e od tip int ili float
- # split_function=lambda row:row[column]>=value # togas vrati funkcija cij argument e row i vrakja vrednost true ili false
- split_function = sporedi_broj
- else:
- # split_function=lambda row:row[column]==value # ako vrednosta so koja sporeduvame e od drug tip (string)
- split_function = sporedi_string
- # Divide the rows into two sets and return them
- set_false = []
- set_true = []
- for row in rows:
- if split_function(row, column, value):
- set_true.append(row)
- else:
- set_false.append(row)
- set1 = [row for row in rows if
- split_function(row, column, value)] # za sekoj row od rows za koj split_function vrakja true
- set2 = [row for row in rows if
- not split_function(row, column, value)] # za sekoj row od rows za koj split_function vrakja false
- # return (set1, set2)
- return (set_true, set_false)
- #st, sf = divideset(my_data, 3, 20)
- #print(sf)
- #print(st)
- # Create counts of possible results (the last column of
- # each row is the result)
- def uniquecounts(rows):
- results = {}
- for row in rows:
- # The result is the last column
- r = row[-1]
- results.setdefault(r, 0)
- results[r] += 1
- return results
- #print(uniquecounts(my_data))
- #print(uniquecounts(st))
- #print(uniquecounts(sf))
- # Probability that a randomly placed item will
- # be in the wrong category
- def log2(x):
- from math import log
- l2 = log(x) / log(2)
- return l2
- # Entropy is the sum of p(x)log(p(x)) across all
- # the different possible results
- def entropy(rows):
- results = uniquecounts(rows)
- # Now calculate the entropy
- ent = 0.0
- for r in results.keys():
- p = float(results[r]) / len(rows)
- ent = ent - p * log2(p)
- return ent
- #print(entropy(my_data), entropy(st), entropy(sf))
- # exit(0)
- def buildtree(rows, scoref=entropy):
- if len(rows) == 0: return decisionnode()
- current_score = scoref(rows)
- # Set up some variables to track the best criteria
- best_gain = 0.0
- best_column = -1
- best_value = None
- best_subsetf = None
- best_subsett = None
- column_count = len(rows[0]) - 1
- for col in range(column_count):
- # Generate the list of different values in
- # this column
- column_values = set()
- for row in rows:
- column_values.add(row[col])
- # Now try dividing the rows up for each value
- # in this column
- for value in column_values:
- (set1, set2) = divideset(rows, col, value)
- # Information gain
- p = float(len(set1)) / len(rows)
- gain = current_score - p * scoref(set1) - (1 - p) * scoref(set2)
- if gain > best_gain and len(set1) > 0 and len(set2) > 0:
- best_gain = gain
- best_column = col
- best_value = value
- best_subsett = set1
- best_subsetf = set2
- # best_criteria = (col, value)
- # best_sets = (set1, set2)
- # Create the subbranches
- if best_gain > 0:
- trueBranch = buildtree(best_subsett, scoref)
- falseBranch = buildtree(best_subsetf, scoref)
- return decisionnode(col=best_column, value=best_value,
- tb=trueBranch, fb=falseBranch)
- else:
- return decisionnode(results=uniquecounts(rows))
- #t = buildtree(my_data)
- def printtree(tree, level, indent=''):
- # Is this a leaf node?
- if tree.results != None:
- temp = sorted(tree.results.items())
- print(' ' + '{\'' + str(temp[0][0])+ '\': ' + str(temp[0][1])+'}')
- else:
- # Print the criteria
- if level != 0:
- print(' ' + str(tree.col) + ":" + str(tree.value) + '? ' + 'Level= ' + '(' +str(level)+')')
- else:
- print(str(tree.col) + ":" + str(tree.value) + '? ' + 'Level= ' + '(' +str(level)+')')
- # Print the branches
- print(indent + 'T->', end='')
- printtree(tree.tb, level+1,indent + ' ')
- print(indent + 'F->', end='')
- printtree(tree.fb, level+1,indent + ' ')
- #printtree(t)
- # exit(0)
- def classify(observation, tree):
- if tree.results != None:
- return tree.results
- else:
- vrednost = observation[tree.col]
- branch = None
- if isinstance(vrednost, int) or isinstance(vrednost, float):
- if vrednost >= tree.value:
- branch = tree.tb
- else:
- branch = tree.fb
- else:
- if vrednost == tree.value:
- branch = tree.tb
- else:
- branch = tree.fb
- return classify(observation, branch)
- #print(classify(['google', 'MK', 'no', 19, 'Unknown'], t))
- # for test_case in test_cases:
- # print("Nepoznat slucaj:", test_case, " Klasifikacija: ", classify(test_case, t))
- if __name__ == "__main__":
- att1=input()
- att2=input()
- att3=input()
- att4=input()
- planttype=input()
- testCase=[att1,att2,att3,att4,planttype]
- trainingData1 = trainingData[:len(trainingData)/2]
- trainingData2 = trainingData[len(trainingData)/2+1:]
- # for a in trainingData1:
- # print(a)
- tree1 = buildtree(trainingData1)
- tree2 = buildtree(trainingData2)
- printtree(tree1,0)
- printtree(tree2,0)
- t1 = classify(testCase,tree1)
- t2 = classify(testCase,tree2)
- key_t1 = t1.keys()
- key_t2 = t2.keys()
- if key_t1 == key_t2:
- print(key_t1[0])
- else:
- print("KONTRADIKCIJA")
Advertisement
Add Comment
Please, Sign In to add comment