Advertisement
cimona

DrvaNaOdluka

Sep 13th, 2018
330
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.52 KB | None | 0 0
  1. trainingData=[
  2. [6.3,2.9,5.6,1.8,'I. virginica'],
  3. [6.5,3.0,5.8,2.2,'I. virginica'],
  4. [7.6,3.0,6.6,2.1,'I. virginica'],
  5. [4.9,2.5,4.5,1.7,'I. virginica'],
  6. [7.3,2.9,6.3,1.8,'I. virginica'],
  7. [6.7,2.5,5.8,1.8,'I. virginica'],
  8. [7.2,3.6,6.1,2.5,'I. virginica'],
  9. [6.5,3.2,5.1,2.0,'I. virginica'],
  10. [6.4,2.7,5.3,1.9,'I. virginica'],
  11. [6.8,3.0,5.5,2.1,'I. virginica'],
  12. [5.7,2.5,5.0,2.0,'I. virginica'],
  13. [5.8,2.8,5.1,2.4,'I. virginica'],
  14. [6.4,3.2,5.3,2.3,'I. virginica'],
  15. [6.5,3.0,5.5,1.8,'I. virginica'],
  16. [7.7,3.8,6.7,2.2,'I. virginica'],
  17. [7.7,2.6,6.9,2.3,'I. virginica'],
  18. [6.0,2.2,5.0,1.5,'I. virginica'],
  19. [6.9,3.2,5.7,2.3,'I. virginica'],
  20. [5.6,2.8,4.9,2.0,'I. virginica'],
  21. [7.7,2.8,6.7,2.0,'I. virginica'],
  22. [6.3,2.7,4.9,1.8,'I. virginica'],
  23. [6.7,3.3,5.7,2.1,'I. virginica'],
  24. [7.2,3.2,6.0,1.8,'I. virginica'],
  25. [6.2,2.8,4.8,1.8,'I. virginica'],
  26. [6.1,3.0,4.9,1.8,'I. virginica'],
  27. [6.4,2.8,5.6,2.1,'I. virginica'],
  28. [7.2,3.0,5.8,1.6,'I. virginica'],
  29. [7.4,2.8,6.1,1.9,'I. virginica'],
  30. [7.9,3.8,6.4,2.0,'I. virginica'],
  31. [6.4,2.8,5.6,2.2,'I. virginica'],
  32. [6.3,2.8,5.1,1.5,'I. virginica'],
  33. [6.1,2.6,5.6,1.4,'I. virginica'],
  34. [7.7,3.0,6.1,2.3,'I. virginica'],
  35. [6.3,3.4,5.6,2.4,'I. virginica'],
  36. [5.1,3.5,1.4,0.2,'I. setosa'],
  37. [4.9,3.0,1.4,0.2,'I. setosa'],
  38. [4.7,3.2,1.3,0.2,'I. setosa'],
  39. [4.6,3.1,1.5,0.2,'I. setosa'],
  40. [5.0,3.6,1.4,0.2,'I. setosa'],
  41. [5.4,3.9,1.7,0.4,'I. setosa'],
  42. [4.6,3.4,1.4,0.3,'I. setosa'],
  43. [5.0,3.4,1.5,0.2,'I. setosa'],
  44. [4.4,2.9,1.4,0.2,'I. setosa'],
  45. [4.9,3.1,1.5,0.1,'I. setosa'],
  46. [5.4,3.7,1.5,0.2,'I. setosa'],
  47. [4.8,3.4,1.6,0.2,'I. setosa'],
  48. [4.8,3.0,1.4,0.1,'I. setosa'],
  49. [4.3,3.0,1.1,0.1,'I. setosa'],
  50. [5.8,4.0,1.2,0.2,'I. setosa'],
  51. [5.7,4.4,1.5,0.4,'I. setosa'],
  52. [5.4,3.9,1.3,0.4,'I. setosa'],
  53. [5.1,3.5,1.4,0.3,'I. setosa'],
  54. [5.7,3.8,1.7,0.3,'I. setosa'],
  55. [5.1,3.8,1.5,0.3,'I. setosa'],
  56. [5.4,3.4,1.7,0.2,'I. setosa'],
  57. [5.1,3.7,1.5,0.4,'I. setosa'],
  58. [4.6,3.6,1.0,0.2,'I. setosa'],
  59. [5.1,3.3,1.7,0.5,'I. setosa'],
  60. [4.8,3.4,1.9,0.2,'I. setosa'],
  61. [5.0,3.0,1.6,0.2,'I. setosa'],
  62. [5.0,3.4,1.6,0.4,'I. setosa'],
  63. [5.2,3.5,1.5,0.2,'I. setosa'],
  64. [5.2,3.4,1.4,0.2,'I. setosa'],
  65. [5.5,2.3,4.0,1.3,'I. versicolor'],
  66. [6.5,2.8,4.6,1.5,'I. versicolor'],
  67. [5.7,2.8,4.5,1.3,'I. versicolor'],
  68. [6.3,3.3,4.7,1.6,'I. versicolor'],
  69. [4.9,2.4,3.3,1.0,'I. versicolor'],
  70. [6.6,2.9,4.6,1.3,'I. versicolor'],
  71. [5.2,2.7,3.9,1.4,'I. versicolor'],
  72. [5.0,2.0,3.5,1.0,'I. versicolor'],
  73. [5.9,3.0,4.2,1.5,'I. versicolor'],
  74. [6.0,2.2,4.0,1.0,'I. versicolor'],
  75. [6.1,2.9,4.7,1.4,'I. versicolor'],
  76. [5.6,2.9,3.6,1.3,'I. versicolor'],
  77. [6.7,3.1,4.4,1.4,'I. versicolor'],
  78. [5.6,3.0,4.5,1.5,'I. versicolor'],
  79. [5.8,2.7,4.1,1.0,'I. versicolor'],
  80. [6.2,2.2,4.5,1.5,'I. versicolor'],
  81. [5.6,2.5,3.9,1.1,'I. versicolor'],
  82. [5.9,3.2,4.8,1.8,'I. versicolor'],
  83. [6.1,2.8,4.0,1.3,'I. versicolor'],
  84. [6.3,2.5,4.9,1.5,'I. versicolor'],
  85. [6.1,2.8,4.7,1.2,'I. versicolor'],
  86. [6.4,2.9,4.3,1.3,'I. versicolor'],
  87. [6.6,3.0,4.4,1.4,'I. versicolor'],
  88. [6.8,2.8,4.8,1.4,'I. versicolor'],
  89. [6.7,3.0,5.0,1.7,'I. versicolor'],
  90. [6.0,2.9,4.5,1.5,'I. versicolor'],
  91. [5.7,2.6,3.5,1.0,'I. versicolor'],
  92. [5.5,2.4,3.8,1.1,'I. versicolor'],
  93. [5.5,2.4,3.7,1.0,'I. versicolor'],
  94. [5.8,2.7,3.9,1.2,'I. versicolor'],
  95. [6.0,2.7,5.1,1.6,'I. versicolor'],
  96. [5.4,3.0,4.5,1.5,'I. versicolor'],
  97. [6.0,3.4,4.5,1.6,'I. versicolor'],
  98. [6.7,3.1,4.7,1.5,'I. versicolor'],
  99. [6.3,2.3,4.4,1.3,'I. versicolor'],
  100. [5.6,3.0,4.1,1.3,'I. versicolor'],
  101. [5.5,2.5,4.0,1.3,'I. versicolor'],
  102. [5.5,2.6,4.4,1.2,'I. versicolor'],
  103. [6.1,3.0,4.6,1.4,'I. versicolor'],
  104. [5.8,2.6,4.0,1.2,'I. versicolor'],
  105. [5.0,2.3,3.3,1.0,'I. versicolor'],
  106. [5.6,2.7,4.2,1.3,'I. versicolor'],
  107. [5.7,3.0,4.2,1.2,'I. versicolor'],
  108. [5.7,2.9,4.2,1.3,'I. versicolor'],
  109. [6.2,2.9,4.3,1.3,'I. versicolor'],
  110. [5.1,2.5,3.0,1.1,'I. versicolor'],
  111. [5.7,2.8,4.1,1.3,'I. versicolor'],
  112. [6.4,3.1,5.5,1.8,'I. virginica'],
  113. [6.0,3.0,4.8,1.8,'I. virginica'],
  114. [6.9,3.1,5.4,2.1,'I. virginica'],
  115. [6.7,3.1,5.6,2.4,'I. virginica'],
  116. [6.9,3.1,5.1,2.3,'I. virginica'],
  117. [5.8,2.7,5.1,1.9,'I. virginica'],
  118. [6.8,3.2,5.9,2.3,'I. virginica'],
  119. [6.7,3.3,5.7,2.5,'I. virginica'],
  120. [6.7,3.0,5.2,2.3,'I. virginica'],
  121. [6.3,2.5,5.0,1.9,'I. virginica'],
  122. [6.5,3.0,5.2,2.0,'I. virginica'],
  123. [6.2,3.4,5.4,2.3,'I. virginica'],
  124. [4.7,3.2,1.6,0.2,'I. setosa'],
  125. [4.8,3.1,1.6,0.2,'I. setosa'],
  126. [5.4,3.4,1.5,0.4,'I. setosa'],
  127. [5.2,4.1,1.5,0.1,'I. setosa'],
  128. [5.5,4.2,1.4,0.2,'I. setosa'],
  129. [4.9,3.1,1.5,0.2,'I. setosa'],
  130. [5.0,3.2,1.2,0.2,'I. setosa'],
  131. [5.5,3.5,1.3,0.2,'I. setosa'],
  132. [4.9,3.6,1.4,0.1,'I. setosa'],
  133. [4.4,3.0,1.3,0.2,'I. setosa'],
  134. [5.1,3.4,1.5,0.2,'I. setosa'],
  135. [5.0,3.5,1.3,0.3,'I. setosa'],
  136. [4.5,2.3,1.3,0.3,'I. setosa'],
  137. [4.4,3.2,1.3,0.2,'I. setosa'],
  138. [5.0,3.5,1.6,0.6,'I. setosa'],
  139. [5.1,3.8,1.9,0.4,'I. setosa'],
  140. [4.8,3.0,1.4,0.3,'I. setosa'],
  141. [5.1,3.8,1.6,0.2,'I. setosa'],
  142. [5.9,3.0,5.1,1.8,'I. virginica']
  143. ]
  144.  
  145. # my_data=[line.split('\t') for line in file('decision_tree_example.txt')]
  146.  
  147. class decisionnode:
  148. def __init__(self,col=-1,value=None,results=None,tb=None,fb=None):
  149. self.col=col
  150. self.value=value
  151. self.results=results
  152. self.tb=tb
  153. self.fb=fb
  154.  
  155. def sporedi_broj(row,column,value):
  156. return row[column]>=value
  157.  
  158. def sporedi_string(row,column,value):
  159. return row[column]==value
  160.  
  161. # Divides a set on a specific column. Can handle numeric
  162. # or nominal values
  163. def divideset(rows,column,value):
  164. # Make a function that tells us if a row is in
  165. # the first group (true) or the second group (false)
  166. split_function=None
  167. if isinstance(value,int) or isinstance(value,float): # ako vrednosta so koja sporeduvame e od tip int ili float
  168. #split_function=lambda row:row[column]>=value # togas vrati funkcija cij argument e row i vrakja vrednost true ili false
  169. split_function=sporedi_broj
  170. else:
  171. # split_function=lambda row:row[column]==value # ako vrednosta so koja sporeduvame e od drug tip (string)
  172. split_function=sporedi_string
  173.  
  174. # Divide the rows into two sets and return them
  175. # set1=[row for row in rows if split_function(row)] # za sekoj row od rows za koj split_function vrakja true
  176. # set2=[row for row in rows if not split_function(row)] # za sekoj row od rows za koj split_function vrakja false
  177. set1=[row for row in rows if split_function(row,column,value)] # za sekoj row od rows za koj split_function vrakja true
  178. set2=[row for row in rows if not split_function(row,column,value)] # za sekoj row od rows za koj split_function vrakja false
  179. return (set1,set2)
  180.  
  181. # Create counts of possible results (the last column of
  182. # each row is the result)
  183. def uniquecounts(rows):
  184. results={}
  185. for row in rows:
  186. # The result is the last column
  187. r=row[len(row)-1]
  188. if r not in results: results[r]=0
  189. results[r]+=1
  190. return results
  191.  
  192. # Probability that a randomly placed item will
  193. # be in the wrong category
  194. def giniimpurity(rows):
  195. total=len(rows)
  196. counts=uniquecounts(rows)
  197. imp=0
  198. for k1 in counts:
  199. p1=float(counts[k1])/total
  200. for k2 in counts:
  201. if k1==k2: continue
  202. p2=float(counts[k2])/total
  203. imp+=p1*p2
  204. return imp
  205.  
  206.  
  207. # Entropy is the sum of p(x)log(p(x)) across all
  208. # the different possible results
  209. def entropy(rows):
  210. from math import log
  211. log2=lambda x:log(x)/log(2)
  212. results=uniquecounts(rows)
  213. # Now calculate the entropy
  214. ent=0.0
  215. for r in results.keys():
  216. p=float(results[r])/len(rows)
  217. ent=ent-p*log2(p)
  218. return ent
  219.  
  220. def buildtree(rows,scoref=entropy):
  221. if len(rows)==0: return decisionnode()
  222. current_score=scoref(rows)
  223.  
  224. # Set up some variables to track the best criteria
  225. best_gain=0.0
  226. best_criteria=None
  227. best_sets=None
  228.  
  229. column_count=len(rows[0])-1
  230. for col in range(0,column_count):
  231. # Generate the list of different values in
  232. # this column
  233. column_values={}
  234. for row in rows:
  235. column_values[row[col]]=1
  236. #print
  237. # Now try dividing the rows up for each value
  238. # in this column
  239. for value in column_values.keys():
  240. (set1,set2)=divideset(rows,col,value)
  241.  
  242. # Information gain
  243. p=float(len(set1))/len(rows)
  244. gain=current_score-p*scoref(set1)-(1-p)*scoref(set2)
  245. if gain>best_gain and len(set1)>0 and len(set2)>0:
  246. best_gain=gain
  247. best_criteria=(col,value)
  248. best_sets=(set1,set2)
  249.  
  250. # Create the subbranches
  251. if best_gain>0:
  252. trueBranch=buildtree(best_sets[0])
  253. falseBranch=buildtree(best_sets[1])
  254. return decisionnode(col=best_criteria[0],value=best_criteria[1],
  255. tb=trueBranch, fb=falseBranch)
  256. else:
  257. return decisionnode(results=uniquecounts(rows))
  258.  
  259. def printtree(tree,indent=''):
  260. # Is this a leaf node?
  261. if tree.results!=None:
  262. print (str(tree.results))
  263. else:
  264. # Print the criteria
  265. print (str(tree.col)+':'+str(tree.value)+'? ')
  266. # Print the branches
  267. print (indent+'T->',
  268. printtree(tree.tb,indent+' '))
  269. print (indent+'F->',
  270. printtree(tree.fb,indent+' '))
  271.  
  272.  
  273. def classify(observation,tree):
  274. if tree.results!=None:
  275. return tree.results
  276. else:
  277. vrednost=observation[tree.col]
  278. branch=None
  279.  
  280. if isinstance(vrednost,int) or isinstance(vrednost,float):
  281. if vrednost>=tree.value: branch=tree.tb
  282. else: branch=tree.fb
  283. else:
  284. if vrednost==tree.value: branch=tree.tb
  285. else: branch=tree.fb
  286.  
  287. return classify(observation,branch)
  288.  
  289.  
  290. def getFirstHalf(trainingData):
  291. return trainingData[:len(trainingData)/2]
  292.  
  293. def getSecondHalf(trainingData):
  294. return trainingData[len(trainingData)/2:]
  295.  
  296. if __name__ == "__main__":
  297.  
  298.  
  299. att1=input()
  300. att2=input()
  301. att3=input()
  302. att4=input()
  303. planttype=input()
  304. testCase=[att1,att2,att3,att4,planttype]
  305. s1 = getFirstHalf(trainingData)
  306. s2 = getSecondHalf(trainingData)
  307.  
  308. t3=buildtree(s1)
  309. t4=buildtree(s2)
  310. #printtree(t1)
  311. #printtree(t2)
  312.  
  313. p3 = classify(testCase,t3)
  314. p4 = classify(testCase,t4)
  315. #print(p1)
  316. #print(p2)
  317. k3 = p3.keys()
  318. k4 = p4.keys()
  319.  
  320. if(k3 != k4):
  321. print ("KONTRADIKCIJA")
  322. else:
  323. print (k3[0])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement