Advertisement
Guest User

Untitled

a guest
Oct 22nd, 2016
96
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.27 KB | None | 0 0
  1. trainingData=[['slashdot','USA','yes',18,'None'],
  2.         ['google','France','yes',23,'Premium'],
  3.         ['google','France','yes',23,'Basic'],
  4.         ['google','France','yes',23,'Basic'],
  5.         ['digg','USA','yes',24,'Basic'],
  6.         ['kiwitobes','France','yes',23,'Basic'],
  7.         ['google','UK','no',21,'Premium'],
  8.         ['(direct)','New Zealand','no',12,'None'],
  9.         ['(direct)','UK','no',21,'Basic'],
  10.         ['google','USA','no',24,'Premium'],
  11.         ['slashdot','France','yes',19,'None'],
  12.         ['digg','USA','no',18,'None'],
  13.         ['google','UK','no',18,'None'],
  14.         ['kiwitobes','UK','no',19,'None'],
  15.         ['digg','New Zealand','yes',12,'Basic'],
  16.         ['slashdot','UK','no',21,'None'],
  17.         ['google','UK','yes',18,'Basic'],
  18.         ['kiwitobes','France','yes',19,'Basic']]
  19. class decisionnode:
  20.       def __init__(self,col=-1,value=None,results=None,tb=None,fb=None,level=0):
  21.          self.col=col
  22.          self.value=value
  23.          self.results=results
  24.          self.tb=tb
  25.          self.fb=fb
  26.          self.level=level
  27.  
  28. def sporedi_broj(row,column,value):
  29.   return row[column]>=value
  30.  
  31. def sporedi_string(row,column,value):
  32.   return row[column]==value
  33.  
  34. # Divides a set on a specific column. Can handle numeric
  35. # or nominal values
  36. def divideset(rows,column,value):
  37.     # Make a function that tells us if a row is in
  38.     # the first group (true) or the second group (false)
  39.     split_function=None
  40.     if isinstance(value,int) or isinstance(value,float): # ako vrednosta so koja sporeduvame e od tip int ili float
  41.        #split_function=lambda row:row[column]>=value # togas vrati funkcija cij argument e row i vrakja vrednost true ili false
  42.        split_function=sporedi_broj
  43.     else:
  44.        # split_function=lambda row:row[column]==value # ako vrednosta so koja sporeduvame e od drug tip (string)
  45.        split_function=sporedi_string
  46.  
  47.     # Divide the rows into two sets and return them
  48.     # set1=[row for row in rows if split_function(row)]  # za sekoj row od rows za koj split_function vrakja true
  49.     # set2=[row for row in rows if not split_function(row)] # za sekoj row od rows za koj split_function vrakja false
  50.     set1=[row for row in rows if split_function(row,column,value)]  # za sekoj row od rows za koj split_function vrakja true
  51.     set2=[row for row in rows if not split_function(row,column,value)] # za sekoj row od rows za koj split_function vrakja false
  52.     return (set1,set2)
  53.  
  54. # Create counts of possible results (the last column of
  55. # each row is the result)
  56. def uniquecounts(rows):
  57.   results={}
  58.   for row in rows:
  59.      # The result is the last column
  60.      r=row[len(row)-1]
  61.      if r not in results: results[r]=0
  62.      results[r]+=1
  63.   return results
  64.  
  65. # Probability that a randomly placed item will
  66. # be in the wrong category
  67. def giniimpurity(rows):
  68.       total=len(rows)
  69.       counts=uniquecounts(rows)
  70.       imp=0
  71.       for k1 in counts:
  72.             p1=float(counts[k1])/total
  73.             for k2 in counts:
  74.                   if k1==k2: continue
  75.                   p2=float(counts[k2])/total
  76.                   imp+=p1*p2
  77.       return imp
  78.  
  79.  
  80. # Entropy is the sum of p(x)log(p(x)) across all
  81. # the different possible results
  82. def entropy(rows):
  83.       from math import log
  84.       log2=lambda x:log(x)/log(2)
  85.       results=uniquecounts(rows)
  86.       # Now calculate the entropy
  87.       ent=0.0
  88.       for r in results.keys():
  89.             p=float(results[r])/len(rows)
  90.             ent=ent-p*log2(p)
  91.       return ent
  92.  
  93. def buildtree(rows,scoref=entropy,level=0):
  94.       if len(rows)==0: return decisionnode()
  95.       current_score=scoref(rows)
  96.  
  97.       # Set up some variables to track the best criteria
  98.       best_gain=0.0
  99.       best_criteria=None
  100.       best_sets=None
  101.  
  102.       column_count=len(rows[0])-1
  103.       for col in range(0,column_count):
  104.             # Generate the list of different values in
  105.             # this column
  106.             column_values={}
  107.             for row in rows:
  108.                   column_values[row[col]]=1
  109.             # Now try dividing the rows up for each value
  110.             # in this column
  111.             for value in column_values.keys():
  112.                   (set1,set2)=divideset(rows,col,value)
  113.  
  114.                   # Information gain
  115.                   p=float(len(set1))/len(rows)
  116.                   gain=current_score-p*scoref(set1)-(1-p)*scoref(set2)
  117.                   if gain>best_gain and len(set1)>0 and len(set2)>0:
  118.                         best_gain=gain
  119.                         best_criteria=(col,value)
  120.                         best_sets=(set1,set2)
  121.  
  122.       # Create the subbranches
  123.       if best_gain>0:
  124.             trueBranch=buildtree(best_sets[0],level=level+1)
  125.             falseBranch=buildtree(best_sets[1],level=level+1)
  126.  
  127.             return decisionnode(col=best_criteria[0],value=best_criteria[1],
  128.                             tb=trueBranch, fb=falseBranch, level=level)
  129.       else:
  130.             return decisionnode(results=uniquecounts(rows))
  131.  
  132. def printtree(tree,indent=''):
  133.       # Is this a leaf node?
  134.       if tree.results!=None:
  135.             print str(tree.results)
  136.       else:
  137.             # Print the criteria
  138.             print str(tree.col)+':'+str(tree.value)+'? ' + 'Level='+str(tree.level)
  139.             # Print the branches
  140.             print indent+'T->',
  141.             printtree(tree.tb,indent+'  ')
  142.             print indent+'F->',
  143.             printtree(tree.fb,indent+'  ')
  144.  
  145.  
  146.  
  147. def classify(observation,tree):
  148.     if tree.results!=None:
  149.         return tree.results
  150.     else:
  151.         vrednost=observation[tree.col]
  152.         branch=None
  153.  
  154.         if isinstance(vrednost,int) or isinstance(vrednost,float):
  155.             if vrednost>=tree.value: branch=tree.tb
  156.             else: branch=tree.fb
  157.         else:
  158.            if vrednost==tree.value: branch=tree.tb
  159.            else: branch=tree.fb
  160.  
  161.         return classify(observation,branch)
  162.  
  163.  
  164. if __name__ == "__main__":
  165.     # referrer='slashdot'
  166.     # location='US'
  167.     # readFAQ='no'
  168.     # pagesVisited=19
  169.     # serviceChosen='None'
  170.  
  171.     referrer=input()
  172.     location=input()
  173.     readFAQ=input()
  174.     pagesVisited=input()
  175.     serviceChosen=input()
  176.  
  177.     testCase=[referrer, location, readFAQ, pagesVisited, serviceChosen]
  178.     trainingData.append(testCase)
  179.     t=buildtree(trainingData)
  180.     printtree(t)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement