ZivkicaI

zadaca jane

Nov 14th, 2018
263
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 11.19 KB | None | 0 0
  1. from __future__ import print_function
  2.  
  3. trainingData=[
  4. [6.3,2.9,5.6,1.8,'I. virginica'],
  5. [6.5,3.0,5.8,2.2,'I. virginica'],
  6. [7.6,3.0,6.6,2.1,'I. virginica'],
  7. [4.9,2.5,4.5,1.7,'I. virginica'],
  8. [7.3,2.9,6.3,1.8,'I. virginica'],
  9. [6.7,2.5,5.8,1.8,'I. virginica'],
  10. [7.2,3.6,6.1,2.5,'I. virginica'],
  11. [6.5,3.2,5.1,2.0,'I. virginica'],
  12. [6.4,2.7,5.3,1.9,'I. virginica'],
  13. [6.8,3.0,5.5,2.1,'I. virginica'],
  14. [5.7,2.5,5.0,2.0,'I. virginica'],
  15. [5.8,2.8,5.1,2.4,'I. virginica'],
  16. [6.4,3.2,5.3,2.3,'I. virginica'],
  17. [6.5,3.0,5.5,1.8,'I. virginica'],
  18. [7.7,3.8,6.7,2.2,'I. virginica'],
  19. [7.7,2.6,6.9,2.3,'I. virginica'],
  20. [6.0,2.2,5.0,1.5,'I. virginica'],
  21. [6.9,3.2,5.7,2.3,'I. virginica'],
  22. [5.6,2.8,4.9,2.0,'I. virginica'],
  23. [7.7,2.8,6.7,2.0,'I. virginica'],
  24. [6.3,2.7,4.9,1.8,'I. virginica'],
  25. [6.7,3.3,5.7,2.1,'I. virginica'],
  26. [7.2,3.2,6.0,1.8,'I. virginica'],
  27. [6.2,2.8,4.8,1.8,'I. virginica'],
  28. [6.1,3.0,4.9,1.8,'I. virginica'],
  29. [6.4,2.8,5.6,2.1,'I. virginica'],
  30. [7.2,3.0,5.8,1.6,'I. virginica'],
  31. [7.4,2.8,6.1,1.9,'I. virginica'],
  32. [7.9,3.8,6.4,2.0,'I. virginica'],
  33. [6.4,2.8,5.6,2.2,'I. virginica'],
  34. [6.3,2.8,5.1,1.5,'I. virginica'],
  35. [6.1,2.6,5.6,1.4,'I. virginica'],
  36. [7.7,3.0,6.1,2.3,'I. virginica'],
  37. [6.3,3.4,5.6,2.4,'I. virginica'],              
  38. [5.1,3.5,1.4,0.2,'I. setosa'],
  39. [4.9,3.0,1.4,0.2,'I. setosa'],
  40. [4.7,3.2,1.3,0.2,'I. setosa'],
  41. [4.6,3.1,1.5,0.2,'I. setosa'],
  42. [5.0,3.6,1.4,0.2,'I. setosa'],
  43. [5.4,3.9,1.7,0.4,'I. setosa'],
  44. [4.6,3.4,1.4,0.3,'I. setosa'],
  45. [5.0,3.4,1.5,0.2,'I. setosa'],
  46. [4.4,2.9,1.4,0.2,'I. setosa'],
  47. [4.9,3.1,1.5,0.1,'I. setosa'],
  48. [5.4,3.7,1.5,0.2,'I. setosa'],
  49. [4.8,3.4,1.6,0.2,'I. setosa'],
  50. [4.8,3.0,1.4,0.1,'I. setosa'],
  51. [4.3,3.0,1.1,0.1,'I. setosa'],
  52. [5.8,4.0,1.2,0.2,'I. setosa'],
  53. [5.7,4.4,1.5,0.4,'I. setosa'],
  54. [5.4,3.9,1.3,0.4,'I. setosa'],
  55. [5.1,3.5,1.4,0.3,'I. setosa'],
  56. [5.7,3.8,1.7,0.3,'I. setosa'],
  57. [5.1,3.8,1.5,0.3,'I. setosa'],
  58. [5.4,3.4,1.7,0.2,'I. setosa'],
  59. [5.1,3.7,1.5,0.4,'I. setosa'],
  60. [4.6,3.6,1.0,0.2,'I. setosa'],
  61. [5.1,3.3,1.7,0.5,'I. setosa'],
  62. [4.8,3.4,1.9,0.2,'I. setosa'],
  63. [5.0,3.0,1.6,0.2,'I. setosa'],
  64. [5.0,3.4,1.6,0.4,'I. setosa'],
  65. [5.2,3.5,1.5,0.2,'I. setosa'],
  66. [5.2,3.4,1.4,0.2,'I. setosa'],
  67. [5.5,2.3,4.0,1.3,'I. versicolor'],
  68. [6.5,2.8,4.6,1.5,'I. versicolor'],
  69. [5.7,2.8,4.5,1.3,'I. versicolor'],
  70. [6.3,3.3,4.7,1.6,'I. versicolor'],
  71. [4.9,2.4,3.3,1.0,'I. versicolor'],
  72. [6.6,2.9,4.6,1.3,'I. versicolor'],
  73. [5.2,2.7,3.9,1.4,'I. versicolor'],
  74. [5.0,2.0,3.5,1.0,'I. versicolor'],
  75. [5.9,3.0,4.2,1.5,'I. versicolor'],
  76. [6.0,2.2,4.0,1.0,'I. versicolor'],
  77. [6.1,2.9,4.7,1.4,'I. versicolor'],
  78. [5.6,2.9,3.6,1.3,'I. versicolor'],
  79. [6.7,3.1,4.4,1.4,'I. versicolor'],
  80. [5.6,3.0,4.5,1.5,'I. versicolor'],
  81. [5.8,2.7,4.1,1.0,'I. versicolor'],
  82. [6.2,2.2,4.5,1.5,'I. versicolor'],
  83. [5.6,2.5,3.9,1.1,'I. versicolor'],
  84. [5.9,3.2,4.8,1.8,'I. versicolor'],
  85. [6.1,2.8,4.0,1.3,'I. versicolor'],
  86. [6.3,2.5,4.9,1.5,'I. versicolor'],
  87. [6.1,2.8,4.7,1.2,'I. versicolor'],
  88. [6.4,2.9,4.3,1.3,'I. versicolor'],
  89. [6.6,3.0,4.4,1.4,'I. versicolor'],
  90. [6.8,2.8,4.8,1.4,'I. versicolor'],
  91. [6.7,3.0,5.0,1.7,'I. versicolor'],
  92. [6.0,2.9,4.5,1.5,'I. versicolor'],
  93. [5.7,2.6,3.5,1.0,'I. versicolor'],
  94. [5.5,2.4,3.8,1.1,'I. versicolor'],
  95. [5.5,2.4,3.7,1.0,'I. versicolor'],
  96. [5.8,2.7,3.9,1.2,'I. versicolor'],
  97. [6.0,2.7,5.1,1.6,'I. versicolor'],
  98. [5.4,3.0,4.5,1.5,'I. versicolor'],
  99. [6.0,3.4,4.5,1.6,'I. versicolor'],
  100. [6.7,3.1,4.7,1.5,'I. versicolor'],
  101. [6.3,2.3,4.4,1.3,'I. versicolor'],
  102. [5.6,3.0,4.1,1.3,'I. versicolor'],
  103. [5.5,2.5,4.0,1.3,'I. versicolor'],
  104. [5.5,2.6,4.4,1.2,'I. versicolor'],
  105. [6.1,3.0,4.6,1.4,'I. versicolor'],
  106. [5.8,2.6,4.0,1.2,'I. versicolor'],
  107. [5.0,2.3,3.3,1.0,'I. versicolor'],
  108. [5.6,2.7,4.2,1.3,'I. versicolor'],
  109. [5.7,3.0,4.2,1.2,'I. versicolor'],
  110. [5.7,2.9,4.2,1.3,'I. versicolor'],
  111. [6.2,2.9,4.3,1.3,'I. versicolor'],
  112. [5.1,2.5,3.0,1.1,'I. versicolor'],
  113. [5.7,2.8,4.1,1.3,'I. versicolor'],
  114. [6.4,3.1,5.5,1.8,'I. virginica'],
  115. [6.0,3.0,4.8,1.8,'I. virginica'],
  116. [6.9,3.1,5.4,2.1,'I. virginica'],
  117. [6.7,3.1,5.6,2.4,'I. virginica'],
  118. [6.9,3.1,5.1,2.3,'I. virginica'],
  119. [5.8,2.7,5.1,1.9,'I. virginica'],
  120. [6.8,3.2,5.9,2.3,'I. virginica'],
  121. [6.7,3.3,5.7,2.5,'I. virginica'],
  122. [6.7,3.0,5.2,2.3,'I. virginica'],
  123. [6.3,2.5,5.0,1.9,'I. virginica'],
  124. [6.5,3.0,5.2,2.0,'I. virginica'],
  125. [6.2,3.4,5.4,2.3,'I. virginica'],
  126. [4.7,3.2,1.6,0.2,'I. setosa'],
  127. [4.8,3.1,1.6,0.2,'I. setosa'],
  128. [5.4,3.4,1.5,0.4,'I. setosa'],
  129. [5.2,4.1,1.5,0.1,'I. setosa'],
  130. [5.5,4.2,1.4,0.2,'I. setosa'],
  131. [4.9,3.1,1.5,0.2,'I. setosa'],
  132. [5.0,3.2,1.2,0.2,'I. setosa'],
  133. [5.5,3.5,1.3,0.2,'I. setosa'],
  134. [4.9,3.6,1.4,0.1,'I. setosa'],
  135. [4.4,3.0,1.3,0.2,'I. setosa'],
  136. [5.1,3.4,1.5,0.2,'I. setosa'],
  137. [5.0,3.5,1.3,0.3,'I. setosa'],
  138. [4.5,2.3,1.3,0.3,'I. setosa'],
  139. [4.4,3.2,1.3,0.2,'I. setosa'],
  140. [5.0,3.5,1.6,0.6,'I. setosa'],
  141. [5.1,3.8,1.9,0.4,'I. setosa'],
  142. [4.8,3.0,1.4,0.3,'I. setosa'],
  143. [5.1,3.8,1.6,0.2,'I. setosa'],
  144. [5.9,3.0,5.1,1.8,'I. virginica']
  145. ]
  146.  
  147.  
  148. # my_data=[line.split('\t') for line in file('decision_tree_example.txt')]
  149.  
  150. class decisionnode:
  151.     def __init__(self, col=-1, value=None, results=None, tb=None, fb=None):
  152.         self.col = col
  153.         self.value = value
  154.         self.results = results
  155.         self.tb = tb
  156.         self.fb = fb
  157.  
  158.  
  159. def sporedi_broj(row, column, value):
  160.     return row[column] >= value
  161.  
  162.  
  163. def sporedi_string(row, column, value):
  164.     return row[column] == value
  165.  
  166.  
  167. # Divides a set on a specific column. Can handle numeric
  168. # or nominal values
  169. def divideset(rows, column, value):
  170.     # Make a function that tells us if a row is in
  171.     # the first group (true) or the second group (false)
  172.     split_function = None
  173.     if isinstance(value, int) or isinstance(value, float):  # ako vrednosta so koja sporeduvame e od tip int ili float
  174.         # split_function=lambda row:row[column]>=value # togas vrati funkcija cij argument e row i vrakja vrednost true ili false
  175.         split_function = sporedi_broj
  176.     else:
  177.         # split_function=lambda row:row[column]==value # ako vrednosta so koja sporeduvame e od drug tip (string)
  178.         split_function = sporedi_string
  179.  
  180.     # Divide the rows into two sets and return them
  181.     set_false = []
  182.     set_true = []
  183.     for row in rows:
  184.         if split_function(row, column, value):
  185.             set_true.append(row)
  186.         else:
  187.             set_false.append(row)
  188.     set1 = [row for row in rows if
  189.             split_function(row, column, value)]  # za sekoj row od rows za koj split_function vrakja true
  190.     set2 = [row for row in rows if
  191.             not split_function(row, column, value)]  # za sekoj row od rows za koj split_function vrakja false
  192.     # return (set1, set2)
  193.     return (set_true, set_false)
  194.  
  195.  
  196. #st, sf = divideset(my_data, 3, 20)
  197. #print(sf)
  198. #print(st)
  199.  
  200.  
  201. # Create counts of possible results (the last column of
  202. # each row is the result)
  203. def uniquecounts(rows):
  204.     results = {}
  205.     for row in rows:
  206.         # The result is the last column
  207.         r = row[-1]
  208.         results.setdefault(r, 0)
  209.         results[r] += 1
  210.  
  211.     return results
  212.  
  213.  
  214. #print(uniquecounts(my_data))
  215. #print(uniquecounts(st))
  216. #print(uniquecounts(sf))
  217.  
  218.  
  219. # Probability that a randomly placed item will
  220. # be in the wrong category
  221.  
  222. def log2(x):
  223.     from math import log
  224.     l2 = log(x) / log(2)
  225.     return l2
  226.  
  227.  
  228. # Entropy is the sum of p(x)log(p(x)) across all
  229. # the different possible results
  230. def entropy(rows):
  231.     results = uniquecounts(rows)
  232.     # Now calculate the entropy
  233.     ent = 0.0
  234.     for r in results.keys():
  235.         p = float(results[r]) / len(rows)
  236.         ent = ent - p * log2(p)
  237.     return ent
  238.  
  239.  
  240. #print(entropy(my_data), entropy(st), entropy(sf))
  241.  
  242.  
  243. # exit(0)
  244.  
  245.  
  246. def buildtree(rows, scoref=entropy):
  247.     if len(rows) == 0: return decisionnode()
  248.     current_score = scoref(rows)
  249.  
  250.     # Set up some variables to track the best criteria
  251.     best_gain = 0.0
  252.     best_column = -1
  253.     best_value = None
  254.     best_subsetf = None
  255.     best_subsett = None
  256.  
  257.     column_count = len(rows[0]) - 1
  258.     for col in range(column_count):
  259.         # Generate the list of different values in
  260.         # this column
  261.         column_values = set()
  262.         for row in rows:
  263.             column_values.add(row[col])
  264.         # Now try dividing the rows up for each value
  265.         # in this column
  266.         for value in column_values:
  267.             (set1, set2) = divideset(rows, col, value)
  268.  
  269.             # Information gain
  270.             p = float(len(set1)) / len(rows)
  271.             gain = current_score - p * scoref(set1) - (1 - p) * scoref(set2)
  272.             if gain > best_gain and len(set1) > 0 and len(set2) > 0:
  273.                 best_gain = gain
  274.                 best_column = col
  275.                 best_value = value
  276.                 best_subsett = set1
  277.                 best_subsetf = set2
  278.                 # best_criteria = (col, value)
  279.                 # best_sets = (set1, set2)
  280.  
  281.     # Create the subbranches
  282.     if best_gain > 0:
  283.         trueBranch = buildtree(best_subsett, scoref)
  284.         falseBranch = buildtree(best_subsetf, scoref)
  285.         return decisionnode(col=best_column, value=best_value,
  286.                             tb=trueBranch, fb=falseBranch)
  287.     else:
  288.         return decisionnode(results=uniquecounts(rows))
  289.  
  290.  
  291. #t = buildtree(my_data)
  292.  
  293.  
  294. def printtree(tree, level, indent=''):
  295.     # Is this a leaf node?
  296.     if tree.results != None:
  297.         temp = sorted(tree.results.items())
  298.         print(' ' + '{\'' + str(temp[0][0])+ '\': ' + str(temp[0][1])+'}')
  299.  
  300.     else:
  301.         # Print the criteria
  302.         if level != 0:
  303.             print(' ' + str(tree.col) + ":" + str(tree.value) + '? ' + 'Level= ' + '(' +str(level)+')')
  304.         else:
  305.             print(str(tree.col) + ":" + str(tree.value) + '? ' + 'Level= ' + '(' +str(level)+')')
  306.         # Print the branches
  307.         print(indent + 'T->', end='')
  308.         printtree(tree.tb, level+1,indent + '  ')
  309.         print(indent + 'F->', end='')
  310.         printtree(tree.fb, level+1,indent + '  ')
  311.  
  312.  
  313. #printtree(t)
  314.  
  315. # exit(0)
  316.  
  317. def classify(observation, tree):
  318.     if tree.results != None:
  319.         return tree.results
  320.     else:
  321.         vrednost = observation[tree.col]
  322.         branch = None
  323.  
  324.         if isinstance(vrednost, int) or isinstance(vrednost, float):
  325.             if vrednost >= tree.value:
  326.                 branch = tree.tb
  327.             else:
  328.                 branch = tree.fb
  329.         else:
  330.             if vrednost == tree.value:
  331.                 branch = tree.tb
  332.             else:
  333.                 branch = tree.fb
  334.  
  335.         return classify(observation, branch)
  336.  
  337.  
  338. #print(classify(['google', 'MK', 'no', 19, 'Unknown'], t))
  339. # for test_case in test_cases:
  340. #     print("Nepoznat slucaj:", test_case, " Klasifikacija: ", classify(test_case, t))
  341.  
  342.  
  343. if __name__ == "__main__":
  344.    
  345.  
  346.     att1=input()
  347.     att2=input()
  348.     att3=input()
  349.     att4=input()
  350.     planttype=input()
  351.     testCase=[att1,att2,att3,att4,planttype]
  352.     trainingData1 = trainingData[:len(trainingData)/2]
  353.     trainingData2 = trainingData[len(trainingData)/2+1:]
  354. #    for a in trainingData1:
  355. #       print(a)
  356.     tree1 = buildtree(trainingData1)
  357.     tree2 = buildtree(trainingData2)
  358.     printtree(tree1,0)
  359.     printtree(tree2,0)
  360.     t1 = classify(testCase,tree1)
  361.     t2 = classify(testCase,tree2)
  362.     key_t1 = t1.keys()
  363.     key_t2 = t2.keys()
  364.     if key_t1 == key_t2:
  365.         print(key_t1[0])
  366.     else:
  367.         print("KONTRADIKCIJA")
Advertisement
Add Comment
Please, Sign In to add comment