Advertisement
NHristovski

Untitled

May 7th, 2018
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.16 KB | None | 0 0
  1. class decisionnode:
  2. def __init__(self,col=-1,value=None,results=None,tb=None,fb=None):
  3. self.col=col
  4. self.value=value
  5. self.results=results
  6. self.tb=tb
  7. self.fb=fb
  8.  
  9. def sporedi_broj(row,column,value):
  10. return row[column]>=value
  11.  
  12. def sporedi_string(row,column,value):
  13. return row[column]==value
  14.  
  15. # Divides a set on a specific column. Can handle numeric
  16. # or nominal values
  17. def divideset(rows,column,value):
  18. # Make a function that tells us if a row is in
  19. # the first group (true) or the second group (false)
  20. split_function=None
  21. if isinstance(value,int) or isinstance(value,float): # ako vrednosta so koja sporeduvame e od tip int ili float
  22. #split_function=lambda row:row[column]>=value # togas vrati funkcija cij argument e row i vrakja vrednost true ili false
  23. split_function=sporedi_broj
  24. else:
  25. # split_function=lambda row:row[column]==value # ako vrednosta so koja sporeduvame e od drug tip (string)
  26. split_function=sporedi_string
  27.  
  28. # Divide the rows into two sets and return them
  29. # set1=[row for row in rows if split_function(row)] # za sekoj row od rows za koj split_function vrakja true
  30. # set2=[row for row in rows if not split_function(row)] # za sekoj row od rows za koj split_function vrakja false
  31. set1=[row for row in rows if split_function(row,column,value)] # za sekoj row od rows za koj split_function vrakja true
  32. set2=[row for row in rows if not split_function(row,column,value)] # za sekoj row od rows za koj split_function vrakja false
  33. return (set1,set2)
  34.  
  35. # Create counts of possible results (the last column of
  36. # each row is the result)
  37. def uniquecounts(rows):
  38. results={}
  39. for row in rows:
  40. # The result is the last column
  41. r=row[len(row)-1]
  42. if r not in results: results[r]=0
  43. results[r]+=1
  44. return results
  45.  
  46. # Probability that a randomly placed item will
  47. # be in the wrong category
  48. def giniimpurity(rows):
  49. total=len(rows)
  50. counts=uniquecounts(rows)
  51. imp=0
  52. for k1 in counts:
  53. p1=float(counts[k1])/total
  54. for k2 in counts:
  55. if k1==k2: continue
  56. p2=float(counts[k2])/total
  57. imp+=p1*p2
  58. return imp
  59.  
  60.  
  61. # Entropy is the sum of p(x)log(p(x)) across all
  62. # the different possible results
  63. def entropy(rows):
  64. from math import log
  65. log2=lambda x:log(x)/log(2)
  66. results=uniquecounts(rows)
  67. # Now calculate the entropy
  68. ent=0.0
  69. for r in results.keys():
  70. p=float(results[r])/len(rows)
  71. ent=ent-p*log2(p)
  72. return ent
  73.  
  74. def split_list(a_list):
  75. half = len(a_list)/2
  76. return a_list[:half], a_list[half:]
  77.  
  78. def buildTwoTrees(rows,scoref=entropy):
  79. rows1,rows2 = split_list(rows)
  80. tree1 = buildtree(rows1,scoref)
  81. tree2 = buildtree(rows2,scoref)
  82. return (tree1,tree2)
  83.  
  84. def buildtree(rows,scoref=entropy):
  85. if len(rows)==0: return decisionnode()
  86. current_score=scoref(rows)
  87.  
  88. # Set up some variables to track the best criteria
  89. best_gain=0.0
  90. best_criteria=None
  91. best_sets=None
  92.  
  93. column_count=len(rows[0])-1
  94. for col in range(0,column_count):
  95. # Generate the list of different values in
  96. # this column
  97. column_values={}
  98. for row in rows:
  99. column_values[row[col]]=1
  100. #print
  101. # Now try dividing the rows up for each value
  102. # in this column
  103. for value in column_values.keys():
  104. (set1,set2)=divideset(rows,col,value)
  105.  
  106. # Information gain
  107. p=float(len(set1))/len(rows)
  108. gain=current_score-p*scoref(set1)-(1-p)*scoref(set2)
  109. if gain>best_gain and len(set1)>0 and len(set2)>0:
  110. best_gain=gain
  111. best_criteria=(col,value)
  112. best_sets=(set1,set2)
  113.  
  114. # Create the subbranches
  115. if best_gain>0:
  116. trueBranch=buildtree(best_sets[0])
  117. falseBranch=buildtree(best_sets[1])
  118. return decisionnode(col=best_criteria[0],value=best_criteria[1],
  119. tb=trueBranch, fb=falseBranch)
  120. else:
  121. return decisionnode(results=uniquecounts(rows))
  122.  
  123. def printtree(tree,indent=''):
  124. # Is this a leaf node?
  125. if tree.results!=None:
  126. print str(tree.results)
  127. else:
  128. # Print the criteria
  129. print str(tree.col)+':'+str(tree.value)+'? '
  130. # Print the branches
  131. print indent+'T->',
  132. printtree(tree.tb,indent+' ')
  133. print indent+'F->',
  134. printtree(tree.fb,indent+' ')
  135.  
  136.  
  137. def classify(observation,tree):
  138. if tree.results!=None:
  139. return tree.results
  140. else:
  141. vrednost=observation[tree.col]
  142. branch=None
  143.  
  144. if isinstance(vrednost,int) or isinstance(vrednost,float):
  145. if vrednost>=tree.value: branch=tree.tb
  146. else: branch=tree.fb
  147. else:
  148. if vrednost==tree.value: branch=tree.tb
  149. else: branch=tree.fb
  150.  
  151. return classify(observation,branch)
  152.  
  153. trainingData=[
  154. ['Ispit-0','Odmor-2','Roditeli-1','Drugari-2','FALSE'],
  155. ['Ispit-2','Odmor-2','Roditeli-1','Drugari-2','FALSE'],
  156. ['Ispit-2','Odmor-0','Roditeli-1','Drugari-1','TRUE'],
  157. ['Ispit-2','Odmor-2','Roditeli-0','Drugari-0','TRUE'],
  158. ['Ispit-2','Odmor-2','Roditeli-1','Drugari-1','FALSE'],
  159. ['Ispit-2','Odmor-0','Roditeli-1','Drugari-1','FALSE'],
  160. ['Ispit-2','Odmor-2','Roditeli-1','Drugari-0','FALSE'],
  161. ['Ispit-2','Odmor-2','Roditeli-1','Drugari-0','TRUE'],
  162. ['Ispit-2','Odmor-0','Roditeli-1','Drugari-1','TRUE'],
  163. ['Ispit-2','Odmor-2','Roditeli-1','Drugari-1','FALSE'],
  164. ['Ispit-2','Odmor-0','Roditeli-0','Drugari-1','FALSE'],
  165. ['Ispit-0','Odmor-2','Roditeli-2','Drugari-2','TRUE'],
  166. ['Ispit-2','Odmor-0','Roditeli-2','Drugari-2','TRUE'],
  167. ['Ispit-0','Odmor-2','Roditeli-0','Drugari-2','FALSE'],
  168. ['Ispit-0','Odmor-0','Roditeli-2','Drugari-0','FALSE'],
  169. ['Ispit-2','Odmor-0','Roditeli-0','Drugari-2','FALSE'],
  170. ['Ispit-2','Odmor-2','Roditeli-1','Drugari-0','TRUE'],
  171. ['Ispit-0','Odmor-0','Roditeli-2','Drugari-2','TRUE'],
  172. ['Ispit-0','Odmor-2','Roditeli-1','Drugari-1','FALSE'],
  173. ['Ispit-0','Odmor-2','Roditeli-1','Drugari-0','FALSE'],
  174. ['Ispit-2','Odmor-2','Roditeli-0','Drugari-1','TRUE'],
  175. ['Ispit-2','Odmor-0','Roditeli-2','Drugari-1','FALSE'],
  176. ['Ispit-0','Odmor-2','Roditeli-1','Drugari-1','FALSE'],
  177. ['Ispit-0','Odmor-2','Roditeli-1','Drugari-2','TRUE'],
  178. ['Ispit-2','Odmor-2','Roditeli-0','Drugari-0','FALSE']
  179. ]
  180.  
  181. if __name__ == "__main__":
  182.  
  183.  
  184.  
  185.  
  186. set1,set2 = divideset(trainingData,2,'Roditeli-0')
  187. Roditeli0 = set1
  188.  
  189. for elem in Roditeli0:
  190. print elem
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement