Advertisement
Guest User

Finki SNZ lab2 zadaca 1

a guest
Oct 28th, 2016
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.84 KB | None | 0 0
  1. trainingData=[['slashdot','USA','yes',18,'None'],
  2.         ['google','France','yes',23,'Premium'],
  3.         ['google','France','yes',23,'Basic'],
  4.         ['google','France','yes',23,'Basic'],
  5.         ['digg','USA','yes',24,'Basic'],
  6.         ['kiwitobes','France','yes',23,'Basic'],
  7.         ['google','UK','no',21,'Premium'],
  8.         ['(direct)','New Zealand','no',12,'None'],
  9.         ['(direct)','UK','no',21,'Basic'],
  10.         ['google','USA','no',24,'Premium'],
  11.         ['slashdot','France','yes',19,'None'],
  12.         ['digg','USA','no',18,'None'],
  13.         ['google','UK','no',18,'None'],
  14.         ['kiwitobes','UK','no',19,'None'],
  15.         ['digg','New Zealand','yes',12,'Basic'],
  16.         ['slashdot','UK','no',21,'None'],
  17.         ['google','UK','yes',18,'Basic'],
  18.         ['kiwitobes','France','yes',19,'Basic']]
  19. class decisionnode:
  20.     def __init__(self, col=-1,value=None, results=None,tb=None,fb=None,level=0 ):
  21.         self.col=col
  22.         self.value=value
  23.         self.results=results
  24.         self.tb=tb
  25.         self.fb=fb
  26.         self.level=level;
  27.  
  28. def sporedi_kolona(row, column, value):
  29.     if isinstance(value, int) or isinstance(value,float):
  30.         return row[column]>=value
  31.     else:
  32.         return row[column]==value
  33.     #-----------------------------------------------------------------------
  34. def divideset (rows, column, value):
  35.     split_function=sporedi_kolona
  36.     s1=[]
  37.     s2=[]
  38.  
  39.     for row in rows:
  40.         if split_function(row,column,value):
  41.             s1.append(row)
  42.         else:
  43.             s2.append(row)
  44.     return (s1,s2)
  45. def uniquecounts (rows):
  46.     results={}
  47.     for row in rows:
  48.         r=row[len(row)-1]
  49.         if r not in results:
  50.             results[r]=0
  51.         results[r]+=1
  52.     return results
  53. def Log2 (x):
  54.     from math import log
  55.     return log(x)/log(2)
  56. def entropy (rows):
  57.     results=uniquecounts(rows)
  58.     ent=0.0
  59.     for key in results.keys():
  60.         p=float(results[key])/len(rows)
  61.         ent=ent-p*Log2(p)
  62.  
  63.     return ent
  64. def buildtree(br,rows, scoref=entropy):
  65.     if len(rows)==0:return decisionnode()
  66.     current_score=scoref(rows)
  67.  
  68.     best_gain=0.0
  69.     best_criteria=None
  70.     best_sets=None
  71.  
  72.     column_count=len(rows[0])-1;
  73.     for col in range(0,column_count):
  74.         column_values={}
  75.         for row in rows:
  76.             column_values[row[col]]=1
  77.         for value in column_values.keys():
  78.             (set1,set2)=divideset(rows,col,value)
  79.             p=float(len(set1))/len(rows)
  80.             gain=current_score-p*scoref(set1)-(1-p)*scoref(set2)
  81.             if gain > best_gain and len(set1) > 0 and len(set2) > 0:
  82.                 best_gain = gain
  83.                 best_criteria = (col, value)
  84.                 best_sets = (set1, set2)
  85.     br=br+1;
  86.     if best_gain > 0:
  87.         trueBranch = buildtree(br, best_sets[0])
  88.         falseBranch = buildtree(br, best_sets[1])
  89.         return decisionnode(col=best_criteria[0], value=best_criteria[1],
  90.                             tb=trueBranch, fb=falseBranch, level=br)
  91.     else:
  92.         return decisionnode(results=uniquecounts(rows), level=br)
  93.  
  94. def printtree (tree,indent=''):
  95.     if tree.results!=None:
  96.         print str(tree.results)
  97.     else:
  98.         print str(tree.col)+':'+str(tree.value)+'?'+" Level=" + str(tree.level)
  99.         print indent + 'T->',
  100.         printtree(tree.tb, indent + '  ')
  101.         print indent + 'F->',
  102.         printtree(tree.fb, indent + '  ')
  103.  
  104.  
  105. if __name__ == "__main__":
  106.     # referrer='slashdot'
  107.     # location='US'
  108.     # readFAQ='no'
  109.     # pagesVisited=19
  110.     # serviceChosen='None'
  111.  
  112.     referrer=input()
  113.     location=input()
  114.     readFAQ=input()
  115.     pagesVisited=input()
  116.     serviceChosen=input()
  117.  
  118.     testCase=[referrer, location, readFAQ, pagesVisited, serviceChosen]
  119.     trainingData.append(testCase)
  120.     t=buildtree(-1,trainingData)
  121.     printtree(t)
  122.     #print ("da")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement