Advertisement
Guest User

Zad 1

a guest
Nov 18th, 2018
102
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.29 KB | None | 0 0
  1. trainingData=[['slashdot','USA','yes',18,'None'],
  2. ['google','France','yes',23,'Premium'],
  3. ['google','France','yes',23,'Basic'],
  4. ['google','France','yes',23,'Basic'],
  5. ['digg','USA','yes',24,'Basic'],
  6. ['kiwitobes','France','yes',23,'Basic'],
  7. ['google','UK','no',21,'Premium'],
  8. ['(direct)','New Zealand','no',12,'None'],
  9. ['(direct)','UK','no',21,'Basic'],
  10. ['google','USA','no',24,'Premium'],
  11. ['slashdot','France','yes',19,'None'],
  12. ['digg','USA','no',18,'None'],
  13. ['google','UK','no',18,'None'],
  14. ['kiwitobes','UK','no',19,'None'],
  15. ['digg','New Zealand','yes',12,'Basic'],
  16. ['slashdot','UK','no',21,'None'],
  17. ['google','UK','yes',18,'Basic'],
  18. ['kiwitobes','France','yes',19,'Basic']]
  19.  
  20. class decisionnode:
  21. #konstruktor za inicijalizacija
  22. def __init__(self,col=-1,value=None,results=None,tb=None,fb=None):
  23. self.col=col
  24. self.value=value
  25. self.results=results
  26. self.tb=tb
  27. self.fb=fb
  28.  
  29. def sporedi_broj(row,column,value): #za sporedba dali vrednosta e broj
  30. return row[column]>=value
  31.  
  32. def sporedi_string(row,column,value): #za sporedba dali vrednosta e string
  33. return row[column]==value
  34.  
  35.  
  36. def divideset(rows,column,value):
  37.  
  38. split_function=None #Flag za proverka
  39. if isinstance(value,int) or isinstance(value,float):
  40. # ako vrednosta so koja sporeduvame e od tip int ili float
  41. split_function=sporedi_broj
  42. else:
  43. # ako vrednosta so koja sporeduvame e od drug tip (string)
  44. split_function=sporedi_string
  45.  
  46.  
  47. set1=[row for row in rows if split_function(row,column,value)] # za sekoj row od rows za koj split_function vrakja true
  48. set2=[row for row in rows if not split_function(row,column,value)] # za sekoj row od rows za koj split_function vrakja false
  49. return (set1,set2)
  50.  
  51.  
  52.  
  53. def uniquecounts(rows):
  54. results={}
  55. for row in rows:
  56. r=row[len(row)-1]
  57. if r not in results:
  58. results[r]=0
  59. results[r]+=1
  60. return results
  61.  
  62. def entropy(rows):
  63. from math import log
  64. log2=lambda x:log(x)/log(2)
  65. results=uniquecounts(rows)
  66. ent=0.0
  67. for r in results.keys():
  68. p=float(results[r])/len(rows)
  69. ent=ent-p*log2(p)
  70. return ent
  71.  
  72. def buildtree(rows,scoref=entropy):
  73. if len(rows)==0: return decisionnode()
  74. current_score=scoref(rows)
  75.  
  76.  
  77. best_gain=0.0
  78. best_criteria=None
  79. best_sets=None
  80.  
  81. column_count=len(rows[0])-1
  82. for col in range(0,column_count):
  83. column_values={}
  84. for row in rows:
  85. column_values[row[col]]=1
  86. for value in column_values.keys():
  87. (set1,set2)=divideset(rows,col,value)
  88. p=float(len(set1))/len(rows)
  89. gain=current_score-p*scoref(set1)-(1-p)*scoref(set2)
  90. if gain>best_gain and len(set1)>0 and len(set2)>0:
  91. best_gain=gain
  92. best_criteria=(col,value)
  93. best_sets=(set1,set2)
  94. if best_gain>0:
  95. trueBranch=buildtree(best_sets[0])
  96. falseBranch=buildtree(best_sets[1])
  97. return decisionnode(col=best_criteria[0],value=best_criteria[1],tb=trueBranch, fb=falseBranch)
  98. else:
  99. return decisionnode(results=uniquecounts(rows))
  100.  
  101. def printtree(tree,indent='',level=0):
  102. if tree.results!=None:
  103. print str(tree.results)
  104. else:
  105. # Print the criteria
  106. print str(tree.col)+':'+str(tree.value)+'? ' + 'Level=%d' % (level)
  107. # Print the branches
  108. print indent+'T->',
  109. printtree(tree.tb,indent+' ',level+1)
  110. print indent+'F->',
  111. printtree(tree.fb,indent+' ',level+1)
  112.  
  113. if __name__ == "__main__":
  114. # referrer='slashdot'
  115. # location='US'
  116. # readFAQ='no'
  117. # pagesVisited=19
  118. # serviceChosen='None'
  119.  
  120. referrer=input()
  121. location=input()
  122. readFAQ=input()
  123. pagesVisited=input()
  124. serviceChosen=input()
  125.  
  126. testCase=[referrer, location, readFAQ, pagesVisited, serviceChosen]
  127. trainingData.append(testCase)
  128. t=buildtree(trainingData)
  129. printtree(t)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement