• API
• FAQ
• Tools
• Archive
daily pastebin goal
45%
SHARE
TWEET

# Untitled

a guest Jan 22nd, 2018 95 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. trainingData=[['slashdot','USA','yes',18,'None'],
5.         ['digg','USA','yes',24,'Basic'],
6.         ['kiwitobes','France','yes',23,'Basic'],
8.         ['(direct)','New Zealand','no',12,'None'],
9.         ['(direct)','UK','no',21,'Basic'],
11.         ['slashdot','France','yes',19,'None'],
12.         ['digg','USA','no',18,'None'],
14.         ['kiwitobes','UK','no',19,'None'],
15.         ['digg','New Zealand','yes',12,'Basic'],
16.         ['slashdot','UK','no',21,'None'],
18.         ['kiwitobes','France','yes',19,'Basic']]
19.
20. class decisionnode:
21.
22.       def __init__(self,col=-1,value=None,results=None,tb=None,fb=None):
23.          self.col=col
24.          self.value=value
25.          self.results=results
26.          self.tb=tb
27.          self.fb=fb
28.
29. def sporedi_broj(row,column,value):
30.   return row[column]>=value
31.
32. def sporedi_string(row,column,value):
33.   return row[column]==value
34.
35. def divideset(rows,column,value):
36.
37.     split_function=None
38.     if isinstance(value,int) or isinstance(value,float):
39.
40.        split_function=sporedi_broj
41.     else:
42.
43.        split_function=sporedi_string
44.
45.
46.     set1=[row for row in rows if split_function(row,column,value)]  # za sekoj row od rows za koj split_function vrakja true
47.     set2=[row for row in rows if not split_function(row,column,value)] # za sekoj row od rows za koj split_function vrakja false
48.     return (set1,set2)
49.
50.
51.
52. def uniquecounts(rows):
53.   results={}
54.   for row in rows:
55.      # The result is the last column
56.      r=row[len(row)-1]
57.      if r not in results:
58.             results[r]=0
59.      results[r]+=1
60.   return results
61.
62.
63. def entropy(rows):
64.       from math import log
65.       log2=lambda x:log(x)/log(2)
66.       results=uniquecounts(rows)
67.       # Now calculate the entropy
68.       ent=0.0
69.       for r in results.keys():
70.             p=float(results[r])/len(rows)
71.             ent=ent-p*log2(p)
72.       return ent
73.
74. def buildtree(rows,scoref=entropy):
75.       if len(rows)==0: return decisionnode()
76.       current_score=scoref(rows)
77.
78.
79.       best_gain=0.0
80.       best_criteria=None
81.       best_sets=None
82.
83.       column_count=len(rows[0])-1
84.       for col in range(0,column_count):
85.
86.             column_values={}
87.             for row in rows:
88.                   column_values[row[col]]=1
89.
90.
91.             for value in column_values.keys():
92.                   (set1,set2)=divideset(rows,col,value)
93.
94.
95.                   p=float(len(set1))/len(rows)
96.                   gain=current_score-p*scoref(set1)-(1-p)*scoref(set2)
97.                   if gain>best_gain and len(set1)>0 and len(set2)>0:
98.                         best_gain=gain
99.                         best_criteria=(col,value)
100.                         best_sets=(set1,set2)
101.
102.
103.       if best_gain>0:
104.             trueBranch=buildtree(best_sets[0])
105.             falseBranch=buildtree(best_sets[1])
106.             return decisionnode(col=best_criteria[0],value=best_criteria[1],tb=trueBranch, fb=falseBranch)
107.       else:
108.             return decisionnode(results=uniquecounts(rows))
109.
110. def printtree(tree,indent='',level=0):
111.
112.       if tree.results!=None:
113.             print str(tree.results)
114.       else:
115.             # Print the criteria
116.             print str(tree.col)+':'+str(tree.value)+'? ' + 'Level=%d' % (level)
117.             # Print the branches
118.             print indent+'T->',
119.             printtree(tree.tb,indent+'  ',level+1)
120.             print indent+'F->',
121.             printtree(tree.fb,indent+'  ',level+1)
122.
123. if __name__ == "__main__":
124.     # referrer='slashdot'
125.     # location='US'
127.     # pagesVisited=19
128.     # serviceChosen='None'
129.
130.     referrer=input()
131.     location=input()