daily pastebin goal
9%
SHARE
TWEET

Untitled

a guest Jan 22nd, 2018 85 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. trainingData=[['slashdot','USA','yes',18,'None'],
  2.         ['google','France','yes',23,'Premium'],
  3.         ['google','France','yes',23,'Basic'],
  4.         ['google','France','yes',23,'Basic'],
  5.         ['digg','USA','yes',24,'Basic'],
  6.         ['kiwitobes','France','yes',23,'Basic'],
  7.         ['google','UK','no',21,'Premium'],
  8.         ['(direct)','New Zealand','no',12,'None'],
  9.         ['(direct)','UK','no',21,'Basic'],
  10.         ['google','USA','no',24,'Premium'],
  11.         ['slashdot','France','yes',19,'None'],
  12.         ['digg','USA','no',18,'None'],
  13.         ['google','UK','no',18,'None'],
  14.         ['kiwitobes','UK','no',19,'None'],
  15.         ['digg','New Zealand','yes',12,'Basic'],
  16.         ['slashdot','UK','no',21,'None'],
  17.         ['google','UK','yes',18,'Basic'],
  18.         ['kiwitobes','France','yes',19,'Basic']]
  19.  
  20. class decisionnode:
  21.    
  22.       def __init__(self,col=-1,value=None,results=None,tb=None,fb=None):
  23.          self.col=col
  24.          self.value=value
  25.          self.results=results
  26.          self.tb=tb
  27.          self.fb=fb
  28.  
  29. def sporedi_broj(row,column,value):
  30.   return row[column]>=value
  31.  
  32. def sporedi_string(row,column,value):
  33.   return row[column]==value
  34.  
  35. def divideset(rows,column,value):
  36.    
  37.     split_function=None
  38.     if isinstance(value,int) or isinstance(value,float):
  39.      
  40.        split_function=sporedi_broj
  41.     else:
  42.      
  43.        split_function=sporedi_string
  44.  
  45.    
  46.     set1=[row for row in rows if split_function(row,column,value)]  # za sekoj row od rows za koj split_function vrakja true
  47.     set2=[row for row in rows if not split_function(row,column,value)] # za sekoj row od rows za koj split_function vrakja false
  48.     return (set1,set2)
  49.  
  50.  
  51.  
  52. def uniquecounts(rows):
  53.   results={}
  54.   for row in rows:
  55.      # The result is the last column
  56.      r=row[len(row)-1]
  57.      if r not in results:
  58.             results[r]=0
  59.      results[r]+=1
  60.   return results
  61.  
  62.  
  63. def entropy(rows):
  64.       from math import log
  65.       log2=lambda x:log(x)/log(2)
  66.       results=uniquecounts(rows)
  67.       # Now calculate the entropy
  68.       ent=0.0
  69.       for r in results.keys():
  70.             p=float(results[r])/len(rows)
  71.             ent=ent-p*log2(p)
  72.       return ent
  73.  
  74. def buildtree(rows,scoref=entropy):
  75.       if len(rows)==0: return decisionnode()
  76.       current_score=scoref(rows)
  77.      
  78.    
  79.       best_gain=0.0
  80.       best_criteria=None
  81.       best_sets=None
  82.  
  83.       column_count=len(rows[0])-1
  84.       for col in range(0,column_count):
  85.            
  86.             column_values={}
  87.             for row in rows:
  88.                   column_values[row[col]]=1
  89.                  
  90.            
  91.             for value in column_values.keys():
  92.                   (set1,set2)=divideset(rows,col,value)
  93.  
  94.                  
  95.                   p=float(len(set1))/len(rows)
  96.                   gain=current_score-p*scoref(set1)-(1-p)*scoref(set2)
  97.                   if gain>best_gain and len(set1)>0 and len(set2)>0:
  98.                         best_gain=gain
  99.                         best_criteria=(col,value)
  100.                         best_sets=(set1,set2)
  101.  
  102.      
  103.       if best_gain>0:
  104.             trueBranch=buildtree(best_sets[0])
  105.             falseBranch=buildtree(best_sets[1])
  106.             return decisionnode(col=best_criteria[0],value=best_criteria[1],tb=trueBranch, fb=falseBranch)
  107.       else:
  108.             return decisionnode(results=uniquecounts(rows))
  109.  
  110. def printtree(tree,indent='',level=0):
  111.      
  112.       if tree.results!=None:
  113.             print str(tree.results)
  114.       else:
  115.             # Print the criteria
  116.             print str(tree.col)+':'+str(tree.value)+'? ' + 'Level=%d' % (level)
  117.             # Print the branches
  118.             print indent+'T->',
  119.             printtree(tree.tb,indent+'  ',level+1)
  120.             print indent+'F->',
  121.             printtree(tree.fb,indent+'  ',level+1)        
  122.  
  123. if __name__ == "__main__":
  124.     # referrer='slashdot'
  125.     # location='US'
  126.     # readFAQ='no'
  127.     # pagesVisited=19
  128.     # serviceChosen='None'
  129.  
  130.     referrer=input()
  131.     location=input()
  132.     readFAQ=input()
  133.     pagesVisited=input()
  134.     serviceChosen=input()
  135.  
  136.     testCase=[referrer, location, readFAQ, pagesVisited, serviceChosen]
  137.     trainingData.append(testCase)
  138.     t=buildtree(trainingData)
  139.     printtree(t)
RAW Paste Data
Top