Advertisement
Guest User

Untitled

a guest
Sep 17th, 2019
145
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.53 KB | None | 0 0
  1. class decisionnode:
  2. def __init__(self,col=-1,value=None,results=None,tb=None,fb=None):
  3. self.col=col
  4. self.value=value
  5. self.results=results
  6. self.tb=tb
  7. self.fb=fb
  8.  
  9. def sporedi_broj(row,column,value):
  10. return row[column]>=value
  11.  
  12. def sporedi_string(row,column,value):
  13. return row[column]==value
  14.  
  15. # Divides a set on a specific column. Can handle numeric
  16. # or nominal values
  17. def divideset(rows,column,value):
  18. # Make a function that tells us if a row is in
  19. # the first group (true) or the second group (false)
  20. split_function=None
  21. if isinstance(value,int) or isinstance(value,float): # ako vrednosta so koja sporeduvame e od tip int ili float
  22. #split_function=lambda row:row[column]>=value # togas vrati funkcija cij argument e row i vrakja vrednost true ili false
  23. split_function=sporedi_broj
  24. else:
  25. # split_function=lambda row:row[column]==value # ako vrednosta so koja sporeduvame e od drug tip (string)
  26. split_function=sporedi_string
  27.  
  28. # Divide the rows into two sets and return them
  29. # set1=[row for row in rows if split_function(row)] # za sekoj row od rows za koj split_function vrakja true
  30. # set2=[row for row in rows if not split_function(row)] # za sekoj row od rows za koj split_function vrakja false
  31. set1=[row for row in rows if split_function(row,column,value)] # za sekoj row od rows za koj split_function vrakja true
  32. set2=[row for row in rows if not split_function(row,column,value)] # za sekoj row od rows za koj split_function vrakja false
  33. return (set1,set2)
  34.  
  35. # Create counts of possible results (the last column of
  36. # each row is the result)
  37. def uniquecounts(rows):
  38. results={}
  39. for row in rows:
  40. # The result is the last column
  41. r=row[len(row)-1]
  42. if r not in results: results[r]=0
  43. results[r]+=1
  44. return results
  45.  
  46. # Probability that a randomly placed item will
  47. # be in the wrong category
  48. def giniimpurity(rows):
  49. total=len(rows)
  50. counts=uniquecounts(rows)
  51. imp=0
  52. for k1 in counts:
  53. p1=float(counts[k1])/total
  54. for k2 in counts:
  55. if k1==k2: continue
  56. p2=float(counts[k2])/total
  57. imp+=p1*p2
  58. return imp
  59.  
  60.  
  61. # Entropy is the sum of p(x)log(p(x)) across all
  62. # the different possible results
  63. def entropy(rows):
  64. from math import log
  65. log2=lambda x:log(x)/log(2)
  66. results=uniquecounts(rows)
  67. # Now calculate the entropy
  68. ent=0.0
  69. for r in results.keys():
  70. p=float(results[r])/len(rows)
  71. ent=ent-p*log2(p)
  72. return ent
  73.  
  74. def buildtree(rows,scoref=entropy):
  75. if len(rows)==0: return decisionnode()
  76. current_score=scoref(rows)
  77.  
  78. # Set up some variables to track the best criteria
  79. best_gain=0.0
  80. best_criteria=None
  81. best_sets=None
  82.  
  83. column_count=len(rows[0])-1
  84. for col in range(0,column_count):
  85. # Generate the list of different values in
  86. # this column
  87. column_values={}
  88. for row in rows:
  89. column_values[row[col]]=1
  90. # print
  91. # Now try dividing the rows up for each value
  92. # in this column
  93. for value in column_values.keys():
  94. (set1,set2)=divideset(rows,col,value)
  95.  
  96. # Information gain
  97. p=float(len(set1))/len(rows)
  98. gain=current_score-p*scoref(set1)-(1-p)*scoref(set2)
  99. if gain>best_gain and len(set1)>0 and len(set2)>0:
  100. best_gain=gain
  101. best_criteria=(col,value)
  102. best_sets=(set1,set2)
  103.  
  104. # Create the subbranches
  105. if best_gain>0:
  106. trueBranch=buildtree(best_sets[0])
  107. falseBranch=buildtree(best_sets[1])
  108. return decisionnode(col=best_criteria[0],value=best_criteria[1],
  109. tb=trueBranch, fb=falseBranch)
  110. else:
  111. return decisionnode(results=uniquecounts(rows))
  112.  
  113. def printtree(tree,indent='',level=0):
  114. # Is this a leaf node?
  115. if tree.results!=None:
  116. print(str(tree.results))
  117. else:
  118. # Print the criteria
  119. print(str(tree.col)+':'+str(tree.value)+'? Level= (' + str(level) + ')')
  120. # Print the branches
  121. print(indent+'T->', end='')
  122. printtree(tree.tb,indent+' ',level+1)
  123. print(indent+'F->', end='')
  124. printtree(tree.fb,indent+' ',level+1)
  125.  
  126.  
  127. def classify(observation,tree):
  128. if tree.results!=None:
  129. return tree.results
  130. else:
  131. vrednost=observation[tree.col]
  132. branch=None
  133.  
  134. if isinstance(vrednost,int) or isinstance(vrednost,float):
  135. if vrednost>=tree.value: branch=tree.tb
  136. else: branch=tree.fb
  137. else:
  138. if vrednost==tree.value: branch=tree.tb
  139. else: branch=tree.fb
  140.  
  141. return classify(observation,branch)
  142.  
  143.  
  144.  
  145.  
  146. trainingData=[
  147. [6.3,2.9,5.6,1.8,'I. virginica'],
  148. [6.5,3.0,5.8,2.2,'I. virginica'],
  149. [7.6,3.0,6.6,2.1,'I. virginica'],
  150. [4.9,2.5,4.5,1.7,'I. virginica'],
  151. [7.3,2.9,6.3,1.8,'I. virginica'],
  152. [6.7,2.5,5.8,1.8,'I. virginica'],
  153. [7.2,3.6,6.1,2.5,'I. virginica'],
  154. [6.5,3.2,5.1,2.0,'I. virginica'],
  155. [6.4,2.7,5.3,1.9,'I. virginica'],
  156. [6.8,3.0,5.5,2.1,'I. virginica'],
  157. [5.7,2.5,5.0,2.0,'I. virginica'],
  158. [5.8,2.8,5.1,2.4,'I. virginica'],
  159. [6.4,3.2,5.3,2.3,'I. virginica'],
  160. [6.5,3.0,5.5,1.8,'I. virginica'],
  161. [7.7,3.8,6.7,2.2,'I. virginica'],
  162. [7.7,2.6,6.9,2.3,'I. virginica'],
  163. [6.0,2.2,5.0,1.5,'I. virginica'],
  164. [6.9,3.2,5.7,2.3,'I. virginica'],
  165. [5.6,2.8,4.9,2.0,'I. virginica'],
  166. [7.7,2.8,6.7,2.0,'I. virginica'],
  167. [6.3,2.7,4.9,1.8,'I. virginica'],
  168. [6.7,3.3,5.7,2.1,'I. virginica'],
  169. [7.2,3.2,6.0,1.8,'I. virginica'],
  170. [6.2,2.8,4.8,1.8,'I. virginica'],
  171. [6.1,3.0,4.9,1.8,'I. virginica'],
  172. [6.4,2.8,5.6,2.1,'I. virginica'],
  173. [7.2,3.0,5.8,1.6,'I. virginica'],
  174. [7.4,2.8,6.1,1.9,'I. virginica'],
  175. [7.9,3.8,6.4,2.0,'I. virginica'],
  176. [6.4,2.8,5.6,2.2,'I. virginica'],
  177. [6.3,2.8,5.1,1.5,'I. virginica'],
  178. [6.1,2.6,5.6,1.4,'I. virginica'],
  179. [7.7,3.0,6.1,2.3,'I. virginica'],
  180. [6.3,3.4,5.6,2.4,'I. virginica'],
  181. [5.1,3.5,1.4,0.2,'I. setosa'],
  182. [4.9,3.0,1.4,0.2,'I. setosa'],
  183. [4.7,3.2,1.3,0.2,'I. setosa'],
  184. [4.6,3.1,1.5,0.2,'I. setosa'],
  185. [5.0,3.6,1.4,0.2,'I. setosa'],
  186. [5.4,3.9,1.7,0.4,'I. setosa'],
  187. [4.6,3.4,1.4,0.3,'I. setosa'],
  188. [5.0,3.4,1.5,0.2,'I. setosa'],
  189. [4.4,2.9,1.4,0.2,'I. setosa'],
  190. [4.9,3.1,1.5,0.1,'I. setosa'],
  191. [5.4,3.7,1.5,0.2,'I. setosa'],
  192. [4.8,3.4,1.6,0.2,'I. setosa'],
  193. [4.8,3.0,1.4,0.1,'I. setosa'],
  194. [4.3,3.0,1.1,0.1,'I. setosa'],
  195. [5.8,4.0,1.2,0.2,'I. setosa'],
  196. [5.7,4.4,1.5,0.4,'I. setosa'],
  197. [5.4,3.9,1.3,0.4,'I. setosa'],
  198. [5.1,3.5,1.4,0.3,'I. setosa'],
  199. [5.7,3.8,1.7,0.3,'I. setosa'],
  200. [5.1,3.8,1.5,0.3,'I. setosa'],
  201. [5.4,3.4,1.7,0.2,'I. setosa'],
  202. [5.1,3.7,1.5,0.4,'I. setosa'],
  203. [4.6,3.6,1.0,0.2,'I. setosa'],
  204. [5.1,3.3,1.7,0.5,'I. setosa'],
  205. [4.8,3.4,1.9,0.2,'I. setosa'],
  206. [5.0,3.0,1.6,0.2,'I. setosa'],
  207. [5.0,3.4,1.6,0.4,'I. setosa'],
  208. [5.2,3.5,1.5,0.2,'I. setosa'],
  209. [5.2,3.4,1.4,0.2,'I. setosa'],
  210. [5.5,2.3,4.0,1.3,'I. versicolor'],
  211. [6.5,2.8,4.6,1.5,'I. versicolor'],
  212. [5.7,2.8,4.5,1.3,'I. versicolor'],
  213. [6.3,3.3,4.7,1.6,'I. versicolor'],
  214. [4.9,2.4,3.3,1.0,'I. versicolor'],
  215. [6.6,2.9,4.6,1.3,'I. versicolor'],
  216. [5.2,2.7,3.9,1.4,'I. versicolor'],
  217. [5.0,2.0,3.5,1.0,'I. versicolor'],
  218. [5.9,3.0,4.2,1.5,'I. versicolor'],
  219. [6.0,2.2,4.0,1.0,'I. versicolor'],
  220. [6.1,2.9,4.7,1.4,'I. versicolor'],
  221. [5.6,2.9,3.6,1.3,'I. versicolor'],
  222. [6.7,3.1,4.4,1.4,'I. versicolor'],
  223. [5.6,3.0,4.5,1.5,'I. versicolor'],
  224. [5.8,2.7,4.1,1.0,'I. versicolor'],
  225. [6.2,2.2,4.5,1.5,'I. versicolor'],
  226. [5.6,2.5,3.9,1.1,'I. versicolor'],
  227. [5.9,3.2,4.8,1.8,'I. versicolor'],
  228. [6.1,2.8,4.0,1.3,'I. versicolor'],
  229. [6.3,2.5,4.9,1.5,'I. versicolor'],
  230. [6.1,2.8,4.7,1.2,'I. versicolor'],
  231. [6.4,2.9,4.3,1.3,'I. versicolor'],
  232. [6.6,3.0,4.4,1.4,'I. versicolor'],
  233. [6.8,2.8,4.8,1.4,'I. versicolor'],
  234. [6.7,3.0,5.0,1.7,'I. versicolor'],
  235. [6.0,2.9,4.5,1.5,'I. versicolor'],
  236. [5.7,2.6,3.5,1.0,'I. versicolor'],
  237. [5.5,2.4,3.8,1.1,'I. versicolor'],
  238. [5.5,2.4,3.7,1.0,'I. versicolor'],
  239. [5.8,2.7,3.9,1.2,'I. versicolor'],
  240. [6.0,2.7,5.1,1.6,'I. versicolor'],
  241. [5.4,3.0,4.5,1.5,'I. versicolor'],
  242. [6.0,3.4,4.5,1.6,'I. versicolor'],
  243. [6.7,3.1,4.7,1.5,'I. versicolor'],
  244. [6.3,2.3,4.4,1.3,'I. versicolor'],
  245. [5.6,3.0,4.1,1.3,'I. versicolor'],
  246. [5.5,2.5,4.0,1.3,'I. versicolor'],
  247. [5.5,2.6,4.4,1.2,'I. versicolor'],
  248. [6.1,3.0,4.6,1.4,'I. versicolor'],
  249. [5.8,2.6,4.0,1.2,'I. versicolor'],
  250. [5.0,2.3,3.3,1.0,'I. versicolor'],
  251. [5.6,2.7,4.2,1.3,'I. versicolor'],
  252. [5.7,3.0,4.2,1.2,'I. versicolor'],
  253. [5.7,2.9,4.2,1.3,'I. versicolor'],
  254. [6.2,2.9,4.3,1.3,'I. versicolor'],
  255. [5.1,2.5,3.0,1.1,'I. versicolor'],
  256. [5.7,2.8,4.1,1.3,'I. versicolor'],
  257. [6.4,3.1,5.5,1.8,'I. virginica'],
  258. [6.0,3.0,4.8,1.8,'I. virginica'],
  259. [6.9,3.1,5.4,2.1,'I. virginica'],
  260. [6.7,3.1,5.6,2.4,'I. virginica'],
  261. [6.9,3.1,5.1,2.3,'I. virginica'],
  262. [5.8,2.7,5.1,1.9,'I. virginica'],
  263. [6.8,3.2,5.9,2.3,'I. virginica'],
  264. [6.7,3.3,5.7,2.5,'I. virginica'],
  265. [6.7,3.0,5.2,2.3,'I. virginica'],
  266. [6.3,2.5,5.0,1.9,'I. virginica'],
  267. [6.5,3.0,5.2,2.0,'I. virginica'],
  268. [6.2,3.4,5.4,2.3,'I. virginica'],
  269. [4.7,3.2,1.6,0.2,'I. setosa'],
  270. [4.8,3.1,1.6,0.2,'I. setosa'],
  271. [5.4,3.4,1.5,0.4,'I. setosa'],
  272. [5.2,4.1,1.5,0.1,'I. setosa'],
  273. [5.5,4.2,1.4,0.2,'I. setosa'],
  274. [4.9,3.1,1.5,0.2,'I. setosa'],
  275. [5.0,3.2,1.2,0.2,'I. setosa'],
  276. [5.5,3.5,1.3,0.2,'I. setosa'],
  277. [4.9,3.6,1.4,0.1,'I. setosa'],
  278. [4.4,3.0,1.3,0.2,'I. setosa'],
  279. [5.1,3.4,1.5,0.2,'I. setosa'],
  280. [5.0,3.5,1.3,0.3,'I. setosa'],
  281. [4.5,2.3,1.3,0.3,'I. setosa'],
  282. [4.4,3.2,1.3,0.2,'I. setosa'],
  283. [5.0,3.5,1.6,0.6,'I. setosa'],
  284. [5.1,3.8,1.9,0.4,'I. setosa'],
  285. [4.8,3.0,1.4,0.3,'I. setosa'],
  286. [5.1,3.8,1.6,0.2,'I. setosa'],
  287. [5.9,3.0,5.1,1.8,'I. virginica']
  288. ]
  289.  
  290. if __name__ == "__main__":
  291. att1=3.3#input()
  292. att2=3.2#input()
  293. att3=3.3#input()
  294. att4=1.7#input()
  295. planttype='NA'#input()
  296.  
  297. testCase=[att1,att2,att3,att4,planttype]
  298. length=len(trainingData)
  299.  
  300. trainingData1=[]
  301. trainingData2=[]
  302.  
  303. for i in range(0,length//2):
  304. trainingData1.append(trainingData[i])
  305.  
  306. for j in range(length//2,length):
  307. trainingData2.append(trainingData[j])
  308.  
  309. t1=buildtree(trainingData1)
  310. t2=buildtree(trainingData2)
  311.  
  312. printtree(t1)
  313. printtree(t2)
  314.  
  315. c1=classify(testCase,t1)
  316. c2=classify(testCase,t2)
  317.  
  318. k1=list(c1.keys())
  319. k2=list(c2.keys())
  320.  
  321. print(c1)
  322.  
  323. if k1[0] == k2[0]:
  324. print(k1[0])
  325. else:
  326. print("KONTRADIKCIJA")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement