Advertisement
Guest User

kiki

a guest
Jan 23rd, 2019
145
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.22 KB | None | 0 0
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3.  
  4. import re
  5. import math
  6.  
  7. trainingData=[['slashdot','USA','yes',18,'None'],
  8. ['google','France','yes',23,'Premium'],
  9. ['google','France','yes',23,'Basic'],
  10. ['google','France','yes',23,'Basic'],
  11. ['digg','USA','yes',24,'Basic'],
  12. ['kiwitobes','France','yes',23,'Basic'],
  13. ['google','UK','no',21,'Premium'],
  14. ['(direct)','New Zealand','no',12,'None'],
  15. ['(direct)','UK','no',21,'Basic'],
  16. ['google','USA','no',24,'Premium'],
  17. ['slashdot','France','yes',19,'None'],
  18. ['digg','USA','no',18,'None'],
  19. ['google','UK','no',18,'None'],
  20. ['kiwitobes','UK','no',19,'None'],
  21. ['digg','New Zealand','yes',12,'Basic'],
  22. ['slashdot','UK','no',21,'None'],
  23. ['google','UK','yes',18,'Basic'],
  24. ['kiwitobes','France','yes',19,'Basic']]
  25.  
  26. class decisionnode(object):
  27. def __init__(self, col=-1, value=None, results=None, tb=None, fb=None):
  28. self.col = col
  29. self.value = value
  30. self.results = results
  31. self.tb = tb
  32. self.fb = fb
  33.  
  34. def sporedi_broj(value1, value2):
  35. return value1 >= value2
  36.  
  37.  
  38. def sporedi_string(value1, value2):
  39. return value1 == value2
  40.  
  41.  
  42.  
  43. def divideset(rows, column, value):
  44. sporedi = get_compare_func(value)
  45. # print(split_function)
  46. # Divide the rows into two sets and return them
  47. set_false = []
  48. set_true = []
  49. for row in rows:
  50. uslov=sporedi(row[column], value)
  51. # print(column, value, row[column], uslov, row)
  52. if uslov:
  53. set_true.append(row)
  54. else:
  55. set_false.append(row)
  56. # print(len(set_true), len(set_false))
  57. # set_true = [row for row in rows if
  58. # split_function(row, column, value)] # za sekoj row od rows za koj split_function vrakja true
  59. # set_false = [row for row in rows if
  60. # not split_function(row, column, value)] # za sekoj row od rows za koj split_function vrakja false
  61. return (set_true, set_false)
  62.  
  63.  
  64. def uniquecounts(rows):
  65. d={}
  66. for r in rows:
  67. # print(r[-1])
  68. d.setdefault(r[-1], 0)
  69. d[r[-1]]+=1
  70. return d
  71.  
  72. def entropy(rows):
  73. results = uniquecounts(rows)
  74. # Now calculate the entropy
  75. ent = 0.0
  76. n = len(rows)
  77. for label, cnt in results.items():
  78. # print(r)
  79. p = float(cnt) / n
  80. # print(label, cnt, p)
  81. ent -= p * log2(p)
  82. return ent
  83.  
  84. def info_gain(current_score, sets, scoref=entropy):
  85. m = sum([len(s) for s in sets])
  86. gain = current_score
  87. for s in sets:
  88. n=len(s)
  89. p=1.*n/m
  90. gain -= p*scoref(s)
  91. return gain
  92.  
  93. def buildtree(rows, scoref=entropy):
  94. if len(rows) == 0:
  95. return decisionnode()
  96. current_score = scoref(rows)
  97.  
  98. # Set up some variables to track the best criteria
  99. best_gain = 0.0
  100. best_column = -1
  101. best_value = None
  102. best_subsetf = None
  103. best_subsett = None
  104.  
  105. column_count = len(rows[0]) - 1
  106. for col in range(column_count):
  107. # Generate the list of different values in
  108. # this column
  109. # column_values = set()
  110. # for row in rows:
  111. # column_values.add(row[col])
  112. # print(column_values)
  113. column_values = set([row[col] for row in rows])
  114. # print('Zemame vo predvid podelba po:', col, len(column_values), column_values)
  115. # continue
  116. # Now try dividing the rows up for each value
  117. # in this column
  118. for value in column_values:
  119. sets = divideset(rows, col, value)
  120.  
  121. # Information gain
  122. # p = float(len(set1)) / len(rows)
  123. # gain = current_score - p * scoref(set1) - (1 - p) * scoref(set2)
  124. gain = info_gain(current_score, sets, scoref)
  125. if gain > best_gain and len(sets)>0 and len(sets[0]) > 0 and len(sets[1]) > 0:
  126. best_gain = gain
  127. best_column = col
  128. best_value = value
  129. best_subsett = sets[0]
  130. best_subsetf = sets[1]
  131. # best_criteria = (col, value)
  132. # best_sets = (set1, set2)
  133. # print('Dividing dataset', col, value, gain, sets)
  134. # pronajden e korenot
  135. # return
  136. # Create the subbranches
  137. if best_gain > 0:
  138. # print(best_subsett)
  139. # print(best_subsetf)
  140. print(best_column, best_value, best_gain)
  141. print('Starting true subbranch')
  142. trueBranch = buildtree(best_subsett, scoref)
  143. print()
  144. print('Starting false subbranch')
  145. falseBranch = buildtree(best_subsetf, scoref)
  146. print()
  147. return decisionnode(col=best_column, value=best_value,
  148. tb=trueBranch, fb=falseBranch)
  149.  
  150. else:
  151. print('Terminalen jazol')
  152. print()
  153. return decisionnode(results=uniquecounts(rows))
  154.  
  155. def printtree(tree, indent=''):
  156. # Is this a leaf node?
  157. if tree.results != None:
  158. print(indent + str(sorted(tree.results.items())))
  159. else:
  160. # Print the criteria
  161. print(indent + str(tree.col) + ':' + str(tree.value) + '? ')
  162. # Print the branches
  163. print(indent + 'T->')
  164. printtree(tree.tb, indent + ' ')
  165. print(indent + 'F->')
  166. printtree(tree.fb, indent + ' ')
  167.  
  168.  
  169.  
  170.  
  171. def classify(observation, tree):
  172. if tree.results != None:
  173. sortirano=sorted(tree.results.items())
  174. return sortirano[0][0]
  175.  
  176.  
  177. else:
  178. vrednost = observation[tree.col]
  179. if compare_values(vrednost, tree.value):
  180. branch = tree.tb
  181. else:
  182. branch = tree.fb
  183. return classify(observation, branch)
  184.  
  185.  
  186.  
  187.  
  188. if __name__ == "__main__":
  189. # referrer='slashdot'
  190. # location='UK'
  191. # readFAQ='no'
  192. # pagesVisited=21
  193. # serviceChosen='Unknown'
  194.  
  195. referrer=input()
  196. location=input()
  197. readFAQ=input()
  198. pagesVisited=input()
  199. serviceChosen=input()
  200.  
  201. testCase=[referrer,location,readFAQ,pagesVisited,serviceChosen]
  202. buildtree(trainingData)
  203. klasa=classify(testCase,tree)
  204. print klasa
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement