Advertisement
Kemudraj

drva_zad1

Aug 22nd, 2017
121
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.00 KB | None | 0 0
  1. Задача 1 Problem 1 (8 / 8)
  2. Да се промени класата за дрво на одлука да чува и информација на кое ниво во дрвото се наоѓа јазолот. Потоа да се променат и функциите за градење и печатење на дрвото така што за секој јазол ќе се печати и нивото. Коренот е на нулто ниво. На излез со функцијата printTree треба да се испечати даденото тренинг множество. Прочитана инстанца од стандарден влез да се додаде на тренинг множеството и потоа да се истренира и испечати истото.
  3.  
  4. trainingData=[['slashdot','USA','yes',18,'None'],
  5.         ['google','France','yes',23,'Premium'],
  6.         ['google','France','yes',23,'Basic'],
  7.         ['google','France','yes',23,'Basic'],
  8.         ['digg','USA','yes',24,'Basic'],
  9.         ['kiwitobes','France','yes',23,'Basic'],
  10.         ['google','UK','no',21,'Premium'],
  11.         ['(direct)','New Zealand','no',12,'None'],
  12.         ['(direct)','UK','no',21,'Basic'],
  13.         ['google','USA','no',24,'Premium'],
  14.         ['slashdot','France','yes',19,'None'],
  15.         ['digg','USA','no',18,'None'],
  16.         ['google','UK','no',18,'None'],
  17.         ['kiwitobes','UK','no',19,'None'],
  18.         ['digg','New Zealand','yes',12,'Basic'],
  19.         ['slashdot','UK','no',21,'None'],
  20.         ['google','UK','yes',18,'Basic'],
  21.         ['kiwitobes','France','yes',19,'Basic']]
  22.  
  23. # my_data=[line.split('\t') for line in file('decision_tree_example.txt')]
  24.  
  25. class decisionnode:
  26.       def __init__(self,col=-1,value=None,results=None,tb=None,fb=None):
  27.          self.col=col
  28.          self.value=value
  29.          self.results=results
  30.          self.tb=tb
  31.          self.fb=fb
  32.  
  33. def sporedi_broj(row,column,value):
  34.   return row[column]>=value
  35.  
  36. def sporedi_string(row,column,value):
  37.   return row[column]==value
  38.  
  39. # Divides a set on a specific column. Can handle numeric
  40. # or nominal values
  41. def divideset(rows,column,value):
  42.     # Make a function that tells us if a row is in
  43.     # the first group (true) or the second group (false)
  44.     split_function=None
  45.     if isinstance(value,int) or isinstance(value,float): # ako vrednosta so koja sporeduvame e od tip int ili float
  46.        #split_function=lambda row:row[column]>=value # togas vrati funkcija cij argument e row i vrakja vrednost true ili false
  47.        split_function=sporedi_broj
  48.     else:
  49.        # split_function=lambda row:row[column]==value # ako vrednosta so koja sporeduvame e od drug tip (string)
  50.        split_function=sporedi_string
  51.  
  52.     # Divide the rows into two sets and return them
  53.     # set1=[row for row in rows if split_function(row)]  # za sekoj row od rows za koj split_function vrakja true
  54.     # set2=[row for row in rows if not split_function(row)] # za sekoj row od rows za koj split_function vrakja false
  55.     set1=[row for row in rows if split_function(row,column,value)]  # za sekoj row od rows za koj split_function vrakja true
  56.     set2=[row for row in rows if not split_function(row,column,value)] # za sekoj row od rows za koj split_function vrakja false
  57.     return (set1,set2)
  58.  
  59. # Create counts of possible results (the last column of
  60. # each row is the result)
  61. def uniquecounts(rows):
  62.   results={}
  63.   for row in rows:
  64.      # The result is the last column
  65.      r=row[len(row)-1]
  66.      if r not in results: results[r]=0
  67.      results[r]+=1
  68.   return results
  69.  
  70. # Probability that a randomly placed item will
  71. # be in the wrong category
  72. def giniimpurity(rows):
  73.       total=len(rows)
  74.       counts=uniquecounts(rows)
  75.       imp=0
  76.       for k1 in counts:
  77.             p1=float(counts[k1])/total
  78.             for k2 in counts:
  79.                   if k1==k2: continue
  80.                   p2=float(counts[k2])/total
  81.                   imp+=p1*p2
  82.       return imp
  83.  
  84.  
  85. # Entropy is the sum of p(x)log(p(x)) across all
  86. # the different possible results
  87. def entropy(rows):
  88.       from math import log
  89.       log2=lambda x:log(x)/log(2)
  90.       results=uniquecounts(rows)
  91.       # Now calculate the entropy
  92.       ent=0.0
  93.       for r in results.keys():
  94.             p=float(results[r])/len(rows)
  95.             ent=ent-p*log2(p)
  96.       return ent
  97.  
  98. def buildtree(rows,scoref=entropy):
  99.       if len(rows)==0: return decisionnode()
  100.       current_score=scoref(rows)
  101.      
  102.       # Set up some variables to track the best criteria
  103.       best_gain=0.0
  104.       best_criteria=None
  105.       best_sets=None
  106.  
  107.       column_count=len(rows[0])-1
  108.       for col in range(0,column_count):
  109.             # Generate the list of different values in
  110.             # this column
  111.             column_values={}
  112.             for row in rows:
  113.                   column_values[row[col]]=1
  114.                  
  115.             # Now try dividing the rows up for each value
  116.             # in this column
  117.             for value in column_values.keys():
  118.                   (set1,set2)=divideset(rows,col,value)
  119.  
  120.                   # Information gain
  121.                   p=float(len(set1))/len(rows)
  122.                   gain=current_score-p*scoref(set1)-(1-p)*scoref(set2)
  123.                   if gain>best_gain and len(set1)>0 and len(set2)>0:
  124.                         best_gain=gain
  125.                         best_criteria=(col,value)
  126.                         best_sets=(set1,set2)
  127.  
  128.       # Create the subbranches
  129.       if best_gain>0:
  130.             trueBranch=buildtree(best_sets[0])
  131.             falseBranch=buildtree(best_sets[1])
  132.             return decisionnode(col=best_criteria[0],value=best_criteria[1],
  133.                             tb=trueBranch, fb=falseBranch)
  134.       else:
  135.             return decisionnode(results=uniquecounts(rows))
  136.  
  137. def printtree(tree,indent='',level=0):
  138.       # Is this a leaf node?
  139.       if tree.results!=None:
  140.             print str(tree.results)
  141.       else:
  142.             # Print the criteria
  143.             print str(tree.col)+':'+str(tree.value)+'? ' + 'Level=%d' % (level)
  144.             # Print the branches
  145.             print indent+'T->',
  146.             printtree(tree.tb,indent+'  ',level+1)
  147.             print indent+'F->',
  148.             printtree(tree.fb,indent+'  ',level+1)
  149. if __name__ == "__main__":
  150.     # referrer='slashdot'
  151.     # location='US'
  152.     # readFAQ='no'
  153.     # pagesVisited=19
  154.     # serviceChosen='None'
  155.  
  156.     referrer=input()
  157.     location=input()
  158.     readFAQ=input()
  159.     pagesVisited=input()
  160.     serviceChosen=input()
  161.    
  162.     testCase=[referrer, location, readFAQ, pagesVisited, serviceChosen]
  163.     trainingData.append(testCase)
  164.     t= buildtree(trainingData)
  165.     printtree(t)
  166.  
  167. Sample input
  168. '(direct)'
  169. 'France'
  170. 'no'
  171. 20
  172. 'Basic'
  173.  
  174. Sample output
  175. 3:20? Level=0
  176. T-> 0:slashdot? Level=1
  177.   T-> {'None': 1}
  178.   F-> 0:google? Level=2
  179.     T-> 1:France? Level=3
  180.       T-> {'Premium': 1, 'Basic': 2}
  181.       F-> {'Premium': 2}
  182.     F-> {'Basic': 4}
  183. F-> 2:yes? Level=1
  184.   T-> 0:slashdot? Level=2
  185.     T-> {'None': 2}
  186.     F-> {'Basic': 3}
  187.   F-> {'None': 4}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement