Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- trainingData=[['slashdot','USA','yes',18,'None'],
- ['google','France','yes',23,'Premium'],
- ['google','France','yes',23,'Basic'],
- ['google','France','yes',23,'Basic'],
- ['digg','USA','yes',24,'Basic'],
- ['kiwitobes','France','yes',23,'Basic'],
- ['google','UK','no',21,'Premium'],
- ['(direct)','New Zealand','no',12,'None'],
- ['(direct)','UK','no',21,'Basic'],
- ['google','USA','no',24,'Premium'],
- ['slashdot','France','yes',19,'None'],
- ['digg','USA','no',18,'None'],
- ['google','UK','no',18,'None'],
- ['kiwitobes','UK','no',19,'None'],
- ['digg','New Zealand','yes',12,'Basic'],
- ['slashdot','UK','no',21,'None'],
- ['google','UK','yes',18,'Basic'],
- ['kiwitobes','France','yes',19,'Basic']]
- class decisionnode:
- def __init__(self, col=-1,value=None, results=None,tb=None,fb=None,level=0 ):
- self.col=col
- self.value=value
- self.results=results
- self.tb=tb
- self.fb=fb
- self.level=level;
- def sporedi_kolona(row, column, value):
- if isinstance(value, int) or isinstance(value,float):
- return row[column]>=value
- else:
- return row[column]==value
- #-----------------------------------------------------------------------
- def divideset (rows, column, value):
- split_function=sporedi_kolona
- s1=[]
- s2=[]
- for row in rows:
- if split_function(row,column,value):
- s1.append(row)
- else:
- s2.append(row)
- return (s1,s2)
- def uniquecounts (rows):
- results={}
- for row in rows:
- r=row[len(row)-1]
- if r not in results:
- results[r]=0
- results[r]+=1
- return results
- def Log2 (x):
- from math import log
- return log(x)/log(2)
- def entropy (rows):
- results=uniquecounts(rows)
- ent=0.0
- for key in results.keys():
- p=float(results[key])/len(rows)
- ent=ent-p*Log2(p)
- return ent
- def buildtree(br,rows, scoref=entropy):
- if len(rows)==0:return decisionnode()
- current_score=scoref(rows)
- best_gain=0.0
- best_criteria=None
- best_sets=None
- column_count=len(rows[0])-1;
- for col in range(0,column_count):
- column_values={}
- for row in rows:
- column_values[row[col]]=1
- for value in column_values.keys():
- (set1,set2)=divideset(rows,col,value)
- p=float(len(set1))/len(rows)
- gain=current_score-p*scoref(set1)-(1-p)*scoref(set2)
- if gain > best_gain and len(set1) > 0 and len(set2) > 0:
- best_gain = gain
- best_criteria = (col, value)
- best_sets = (set1, set2)
- br=br+1;
- if best_gain > 0:
- trueBranch = buildtree(br, best_sets[0])
- falseBranch = buildtree(br, best_sets[1])
- return decisionnode(col=best_criteria[0], value=best_criteria[1],
- tb=trueBranch, fb=falseBranch, level=br)
- else:
- return decisionnode(results=uniquecounts(rows), level=br)
- def printtree (tree,indent=''):
- if tree.results!=None:
- print str(tree.results)
- else:
- print str(tree.col)+':'+str(tree.value)+'?'+" Level=" + str(tree.level)
- print indent + 'T->',
- printtree(tree.tb, indent + ' ')
- print indent + 'F->',
- printtree(tree.fb, indent + ' ')
- if __name__ == "__main__":
- # referrer='slashdot'
- # location='US'
- # readFAQ='no'
- # pagesVisited=19
- # serviceChosen='None'
- referrer=input()
- location=input()
- readFAQ=input()
- pagesVisited=input()
- serviceChosen=input()
- testCase=[referrer, location, readFAQ, pagesVisited, serviceChosen]
- trainingData.append(testCase)
- t=buildtree(-1,trainingData)
- printtree(t)
- #print ("da")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement