Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from math import log
- def unique_counts(rows):
- """Create counts of possible results (the last column of
- each row is the result)
- :param rows: dataset
- :type rows: list
- :return: dictionary of possible classes as keys and count
- as values
- :rtype: dict
- """
- results = {}
- for row in rows:
- # The result is the last column
- r = row[len(row) - 1]
- if r not in results:
- results[r] = 0
- results[r] += 1
- return results
- def gini_impurity(rows):
- """Probability that a randomly placed item will
- be in the wrong category
- :param rows: dataset
- :type rows: list
- :return: Gini impurity
- :rtype: float
- """
- total = len(rows)
- counts = unique_counts(rows)
- imp = 0
- for k1 in counts:
- p1 = float(counts[k1]) / total
- for k2 in counts:
- if k1 == k2:
- continue
- p2 = float(counts[k2]) / total
- imp += p1 * p2
- return imp
- def entropy(rows):
- """Entropy is the sum of p(x)log(p(x)) across all
- the different possible results
- :param rows: dataset
- :type rows: list
- :return: entropy value
- :rtype: float
- """
- log2 = lambda x: log(x) / log(2)
- results = unique_counts(rows)
- # Now calculate the entropy
- ent = 0.0
- for r in results.keys():
- p = float(results[r]) / len(rows)
- ent = ent - p * log2(p)
- return ent
- class DecisionNode:
- def __init__(self, col=-1, value=None, results=None, tb=None, fb=None):
- """
- :param col: index of the column (attribute) of the training set that
- is represented with this instance i.e. this node
- :type col: int
- :param value: the value of the node according to which the partition
- in the tree is made
- :param results: results for the current branch, value (not None)
- only in leaves where decision is made.
- :type results: dict
- :param tb: branch that divides from the current node when value is
- equal to value
- :type tb: DecisionNode
- :param fb: branch that divides from the current node when value is
- different from value
- :type fb: DecisionNode
- """
- self.col = col
- self.value = value
- self.results = results
- self.tb = tb
- self.fb = fb
- def compare_numerical(row, column, value):
- """Compare row value of the desired column with particular
- numerical value
- :param row: particular row in the set
- :type row: list
- :param column: index of the column (attribute) of the train set
- :type column: int
- :param value: the value of the node according to which the
- partition in the tree is made
- :type value: int or float
- :return: True if the row >= value, else False
- :rtype: bool
- """
- return row[column] >= value
- def compare_nominal(row, column, value):
- """Compare row value of the desired column with particular
- nominal value
- :param row: particular row in the set
- :type row: list
- :param column: index of the column (attribute) of the train set
- :type column: int
- :param value: the value of the node according to which the
- partition in the tree is made
- :type value: str
- :return: True if the row == value, else False
- :rtype: bool
- """
- return row[column] == value
- def divide_set(rows, column, value):
- """Divides a set on a specific column. Can handle numeric
- or nominal values.
- :param rows: the train set
- :type rows: list(list)
- :param column: index of the column (attribute) of the train set
- :type column: int
- :param value: the value of the node according to which the
- partition in the tree for particular branch is made
- :type value: int or float or string
- :return: divided subsets
- :rtype: list, list
- """
- # Make a function that tells us if a row is in
- # the first group (true) or the second group (false)
- if isinstance(value, int) or isinstance(value, float):
- # if the value for comparison is of type int or float
- split_function = compare_numerical
- else:
- # if the value for comparison is of other type (string)
- split_function = compare_nominal
- # Divide the rows into two sets and return them
- # for each row that split_function returns True
- set1 = [row for row in rows if
- split_function(row, column, value)]
- # for each row that split_function returns False
- set2 = [row for row in rows if
- not split_function(row, column, value)]
- return set1, set2
- def build_tree(rows, scoref=entropy):
- if len(rows) == 0:
- return DecisionNode()
- current_score = scoref(rows)
- # Set up some variables to track the best criteria
- best_gain = 0.0
- best_criteria = None
- best_sets = None
- column_count = len(rows[0]) - 1
- for col in range(0, column_count):
- # Generate the list of different values in this column
- column_values = {}
- for row in rows:
- column_values[row[col]] = 1
- # Now try dividing the rows up for each value in this column
- for value in column_values.keys():
- (set1, set2) = divide_set(rows, col, value)
- # Information gain
- p = float(len(set1)) / len(rows)
- gain = current_score - p * scoref(set1) - (1 - p) * scoref(set2)
- if gain > best_gain and len(set1) > 0 and len(set2) > 0:
- best_gain = gain
- best_criteria = (col, value)
- best_sets = (set1, set2)
- # Create the subbranches
- if best_gain > 0:
- true_branch = build_tree(best_sets[0], scoref)
- false_branch = build_tree(best_sets[1], scoref)
- return DecisionNode(col=best_criteria[0], value=best_criteria[1],
- tb=true_branch, fb=false_branch)
- else:
- return DecisionNode(results=unique_counts(rows))
- def print_tree(tree, indent='', level=0):
- # Is this a leaf node?
- if tree.results:
- print(str(tree.results))
- else:
- # Print the criteria
- print(str(tree.col) + ':' + str(tree.value) + '? ' + 'Level= (' + str(level) + ')')
- # Print the branches
- print(indent + 'T-> ', end='')
- print_tree(tree.tb, indent + ' ', level + 1)
- print(indent + 'F-> ', end='')
- print_tree(tree.fb, indent + ' ', level + 1)
- def classify(observation, tree):
- if tree.results:
- return tree.results
- else:
- value = observation[tree.col]
- if isinstance(value, int) or isinstance(value, float):
- compare = compare_numerical
- else:
- compare = compare_nominal
- if compare(observation, tree.col, tree.value):
- branch = tree.tb
- else:
- branch = tree.fb
- return classify(observation, branch)
- trainingData = [
- [6.3, 2.9, 5.6, 1.8, 'I. virginica'],
- [6.5, 3.0, 5.8, 2.2, 'I. virginica'],
- [7.6, 3.0, 6.6, 2.1, 'I. virginica'],
- [4.9, 2.5, 4.5, 1.7, 'I. virginica'],
- [7.3, 2.9, 6.3, 1.8, 'I. virginica'],
- [6.7, 2.5, 5.8, 1.8, 'I. virginica'],
- [7.2, 3.6, 6.1, 2.5, 'I. virginica'],
- [6.5, 3.2, 5.1, 2.0, 'I. virginica'],
- [6.4, 2.7, 5.3, 1.9, 'I. virginica'],
- [6.8, 3.0, 5.5, 2.1, 'I. virginica'],
- [5.7, 2.5, 5.0, 2.0, 'I. virginica'],
- [5.8, 2.8, 5.1, 2.4, 'I. virginica'],
- [6.4, 3.2, 5.3, 2.3, 'I. virginica'],
- [6.5, 3.0, 5.5, 1.8, 'I. virginica'],
- [7.7, 3.8, 6.7, 2.2, 'I. virginica'],
- [7.7, 2.6, 6.9, 2.3, 'I. virginica'],
- [6.0, 2.2, 5.0, 1.5, 'I. virginica'],
- [6.9, 3.2, 5.7, 2.3, 'I. virginica'],
- [5.6, 2.8, 4.9, 2.0, 'I. virginica'],
- [7.7, 2.8, 6.7, 2.0, 'I. virginica'],
- [6.3, 2.7, 4.9, 1.8, 'I. virginica'],
- [6.7, 3.3, 5.7, 2.1, 'I. virginica'],
- [7.2, 3.2, 6.0, 1.8, 'I. virginica'],
- [6.2, 2.8, 4.8, 1.8, 'I. virginica'],
- [6.1, 3.0, 4.9, 1.8, 'I. virginica'],
- [6.4, 2.8, 5.6, 2.1, 'I. virginica'],
- [7.2, 3.0, 5.8, 1.6, 'I. virginica'],
- [7.4, 2.8, 6.1, 1.9, 'I. virginica'],
- [7.9, 3.8, 6.4, 2.0, 'I. virginica'],
- [6.4, 2.8, 5.6, 2.2, 'I. virginica'],
- [6.3, 2.8, 5.1, 1.5, 'I. virginica'],
- [6.1, 2.6, 5.6, 1.4, 'I. virginica'],
- [7.7, 3.0, 6.1, 2.3, 'I. virginica'],
- [6.3, 3.4, 5.6, 2.4, 'I. virginica'],
- [5.1, 3.5, 1.4, 0.2, 'I. setosa'],
- [4.9, 3.0, 1.4, 0.2, 'I. setosa'],
- [4.7, 3.2, 1.3, 0.2, 'I. setosa'],
- [4.6, 3.1, 1.5, 0.2, 'I. setosa'],
- [5.0, 3.6, 1.4, 0.2, 'I. setosa'],
- [5.4, 3.9, 1.7, 0.4, 'I. setosa'],
- [4.6, 3.4, 1.4, 0.3, 'I. setosa'],
- [5.0, 3.4, 1.5, 0.2, 'I. setosa'],
- [4.4, 2.9, 1.4, 0.2, 'I. setosa'],
- [4.9, 3.1, 1.5, 0.1, 'I. setosa'],
- [5.4, 3.7, 1.5, 0.2, 'I. setosa'],
- [4.8, 3.4, 1.6, 0.2, 'I. setosa'],
- [4.8, 3.0, 1.4, 0.1, 'I. setosa'],
- [4.3, 3.0, 1.1, 0.1, 'I. setosa'],
- [5.8, 4.0, 1.2, 0.2, 'I. setosa'],
- [5.7, 4.4, 1.5, 0.4, 'I. setosa'],
- [5.4, 3.9, 1.3, 0.4, 'I. setosa'],
- [5.1, 3.5, 1.4, 0.3, 'I. setosa'],
- [5.7, 3.8, 1.7, 0.3, 'I. setosa'],
- [5.1, 3.8, 1.5, 0.3, 'I. setosa'],
- [5.4, 3.4, 1.7, 0.2, 'I. setosa'],
- [5.1, 3.7, 1.5, 0.4, 'I. setosa'],
- [4.6, 3.6, 1.0, 0.2, 'I. setosa'],
- [5.1, 3.3, 1.7, 0.5, 'I. setosa'],
- [4.8, 3.4, 1.9, 0.2, 'I. setosa'],
- [5.0, 3.0, 1.6, 0.2, 'I. setosa'],
- [5.0, 3.4, 1.6, 0.4, 'I. setosa'],
- [5.2, 3.5, 1.5, 0.2, 'I. setosa'],
- [5.2, 3.4, 1.4, 0.2, 'I. setosa'],
- [5.5, 2.3, 4.0, 1.3, 'I. versicolor'],
- [6.5, 2.8, 4.6, 1.5, 'I. versicolor'],
- [5.7, 2.8, 4.5, 1.3, 'I. versicolor'],
- [6.3, 3.3, 4.7, 1.6, 'I. versicolor'],
- [4.9, 2.4, 3.3, 1.0, 'I. versicolor'],
- [6.6, 2.9, 4.6, 1.3, 'I. versicolor'],
- [5.2, 2.7, 3.9, 1.4, 'I. versicolor'],
- [5.0, 2.0, 3.5, 1.0, 'I. versicolor'],
- [5.9, 3.0, 4.2, 1.5, 'I. versicolor'],
- [6.0, 2.2, 4.0, 1.0, 'I. versicolor'],
- [6.1, 2.9, 4.7, 1.4, 'I. versicolor'],
- [5.6, 2.9, 3.6, 1.3, 'I. versicolor'],
- [6.7, 3.1, 4.4, 1.4, 'I. versicolor'],
- [5.6, 3.0, 4.5, 1.5, 'I. versicolor'],
- [5.8, 2.7, 4.1, 1.0, 'I. versicolor'],
- [6.2, 2.2, 4.5, 1.5, 'I. versicolor'],
- [5.6, 2.5, 3.9, 1.1, 'I. versicolor'],
- [5.9, 3.2, 4.8, 1.8, 'I. versicolor'],
- [6.1, 2.8, 4.0, 1.3, 'I. versicolor'],
- [6.3, 2.5, 4.9, 1.5, 'I. versicolor'],
- [6.1, 2.8, 4.7, 1.2, 'I. versicolor'],
- [6.4, 2.9, 4.3, 1.3, 'I. versicolor'],
- [6.6, 3.0, 4.4, 1.4, 'I. versicolor'],
- [6.8, 2.8, 4.8, 1.4, 'I. versicolor'],
- [6.7, 3.0, 5.0, 1.7, 'I. versicolor'],
- [6.0, 2.9, 4.5, 1.5, 'I. versicolor'],
- [5.7, 2.6, 3.5, 1.0, 'I. versicolor'],
- [5.5, 2.4, 3.8, 1.1, 'I. versicolor'],
- [5.5, 2.4, 3.7, 1.0, 'I. versicolor'],
- [5.8, 2.7, 3.9, 1.2, 'I. versicolor'],
- [6.0, 2.7, 5.1, 1.6, 'I. versicolor'],
- [5.4, 3.0, 4.5, 1.5, 'I. versicolor'],
- [6.0, 3.4, 4.5, 1.6, 'I. versicolor'],
- [6.7, 3.1, 4.7, 1.5, 'I. versicolor'],
- [6.3, 2.3, 4.4, 1.3, 'I. versicolor'],
- [5.6, 3.0, 4.1, 1.3, 'I. versicolor'],
- [5.5, 2.5, 4.0, 1.3, 'I. versicolor'],
- [5.5, 2.6, 4.4, 1.2, 'I. versicolor'],
- [6.1, 3.0, 4.6, 1.4, 'I. versicolor'],
- [5.8, 2.6, 4.0, 1.2, 'I. versicolor'],
- [5.0, 2.3, 3.3, 1.0, 'I. versicolor'],
- [5.6, 2.7, 4.2, 1.3, 'I. versicolor'],
- [5.7, 3.0, 4.2, 1.2, 'I. versicolor'],
- [5.7, 2.9, 4.2, 1.3, 'I. versicolor'],
- [6.2, 2.9, 4.3, 1.3, 'I. versicolor'],
- [5.1, 2.5, 3.0, 1.1, 'I. versicolor'],
- [5.7, 2.8, 4.1, 1.3, 'I. versicolor'],
- [6.4, 3.1, 5.5, 1.8, 'I. virginica'],
- [6.0, 3.0, 4.8, 1.8, 'I. virginica'],
- [6.9, 3.1, 5.4, 2.1, 'I. virginica'],
- [6.7, 3.1, 5.6, 2.4, 'I. virginica'],
- [6.9, 3.1, 5.1, 2.3, 'I. virginica'],
- [5.8, 2.7, 5.1, 1.9, 'I. virginica'],
- [6.8, 3.2, 5.9, 2.3, 'I. virginica'],
- [6.7, 3.3, 5.7, 2.5, 'I. virginica'],
- [6.7, 3.0, 5.2, 2.3, 'I. virginica'],
- [6.3, 2.5, 5.0, 1.9, 'I. virginica'],
- [6.5, 3.0, 5.2, 2.0, 'I. virginica'],
- [6.2, 3.4, 5.4, 2.3, 'I. virginica'],
- [4.7, 3.2, 1.6, 0.2, 'I. setosa'],
- [4.8, 3.1, 1.6, 0.2, 'I. setosa'],
- [5.4, 3.4, 1.5, 0.4, 'I. setosa'],
- [5.2, 4.1, 1.5, 0.1, 'I. setosa'],
- [5.5, 4.2, 1.4, 0.2, 'I. setosa'],
- [4.9, 3.1, 1.5, 0.2, 'I. setosa'],
- [5.0, 3.2, 1.2, 0.2, 'I. setosa'],
- [5.5, 3.5, 1.3, 0.2, 'I. setosa'],
- [4.9, 3.6, 1.4, 0.1, 'I. setosa'],
- [4.4, 3.0, 1.3, 0.2, 'I. setosa'],
- [5.1, 3.4, 1.5, 0.2, 'I. setosa'],
- [5.0, 3.5, 1.3, 0.3, 'I. setosa'],
- [4.5, 2.3, 1.3, 0.3, 'I. setosa'],
- [4.4, 3.2, 1.3, 0.2, 'I. setosa'],
- [5.0, 3.5, 1.6, 0.6, 'I. setosa'],
- [5.1, 3.8, 1.9, 0.4, 'I. setosa'],
- [4.8, 3.0, 1.4, 0.3, 'I. setosa'],
- [5.1, 3.8, 1.6, 0.2, 'I. setosa'],
- [5.9, 3.0, 5.1, 1.8, 'I. virginica']
- ]
- if __name__ == "__main__":
- att1 = float(input())
- att2 = float(input())
- att3 = float(input())
- att4 = float(input())
- planttype = input()
- testCase = [att1, att2, att3, att4, planttype]
- half = int(len(trainingData)/2)
- data1 = trainingData[:half]
- data2 = trainingData[half:]
- tree1 = build_tree(data1)
- tree2 = build_tree(data2)
- for key in classify(testCase, tree1).keys():
- res1 = key
- for key in classify(testCase, tree2).keys():
- res2 = key
- print_tree(tree1)
- print_tree(tree2)
- if res1 == res2:
- print(res1)
- else:
- print("KONTRADIKCIJA")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement