Advertisement
Samardziev

Дрва-лв

May 25th, 2018
117
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.54 KB | None | 0 0
  1. my_data=[['slashdot','USA','yes',18,'None'],
  2.         ['google','France','yes',23,'Premium'],
  3.         ['google','France','yes',23,'Basic'],
  4.         ['google','France','yes',23,'Basic'],
  5.         ['digg','USA','yes',24,'Basic'],
  6.         ['kiwitobes','France','yes',23,'Basic'],
  7.         ['google','UK','no',21,'Premium'],
  8.         ['(direct)','New Zealand','no',12,'None'],
  9.         ['(direct)','UK','no',21,'Basic'],
  10.         ['google','USA','no',24,'Premium'],
  11.         ['slashdot','France','yes',19,'None'],
  12.         ['digg','USA','no',18,'None'],
  13.         ['google','UK','no',18,'None'],
  14.         ['kiwitobes','UK','no',19,'None'],
  15.         ['digg','New Zealand','yes',12,'Basic'],
  16.         ['slashdot','UK','no',21,'None'],
  17.         ['google','UK','yes',18,'Basic'],
  18.         ['kiwitobes','France','yes',19,'Basic']]
  19.  
  20. class decisionnode:
  21.     def __init__(self, col=-1, value=None, results=None, tb=None, fb=None):
  22.         self.col = col
  23.         self.value = value
  24.         self.results = results
  25.         self.tb = tb
  26.         self.fb = fb
  27.  
  28.  
  29. def sporedi_broj(row, column, value):
  30.     return row[column] >= value
  31.  
  32.  
  33. def sporedi_string(row, column, value):
  34.     return row[column] == value
  35.  
  36.  
  37. # Divides a set on a specific column. Can handle numeric
  38. # or nominal values
  39. def divideset(rows, column, value):
  40.     # Make a function that tells us if a row is in
  41.     # the first group (true) or the second group (false)
  42.     split_function = None
  43.     if isinstance(value, int) or isinstance(value, float):  # ako vrednosta so koja sporeduvame e od tip int ili float
  44.         # split_function=lambda row:row[column]>=value # togas vrati funkcija cij argument e row i vrakja vrednost true ili false
  45.         split_function = sporedi_broj
  46.     else:
  47.         # split_function=lambda row:row[column]==value # ako vrednosta so koja sporeduvame e od drug tip (string)
  48.         split_function = sporedi_string
  49.  
  50.     # Divide the rows into two sets and return them
  51.     set_false = []
  52.     set_true = []
  53.     for row in rows:
  54.         if split_function(row, column, value):
  55.             set_true.append(row)
  56.         else:
  57.             set_false.append(row)
  58.     set1 = [row for row in rows if
  59.             split_function(row, column, value)]  # za sekoj row od rows za koj split_function vrakja true
  60.     set2 = [row for row in rows if
  61.             not split_function(row, column, value)]  # za sekoj row od rows za koj split_function vrakja false
  62.     return (set1, set2)
  63.     #return (set_true, set_false)
  64.  
  65.  
  66. st, sf = divideset(my_data, 3, 20)
  67. #print(sf)
  68. #print(st)
  69.  
  70.  
  71. # Create counts of possible results (the last column of
  72. # each row is the result)
  73. def uniquecounts(rows):
  74.     results = {}
  75.     for row in rows:
  76.         # The result is the last column
  77.         r = row[-1]
  78.         results.setdefault(r, 0)
  79.         results[r] += 1
  80.  
  81.     return results
  82.  
  83.  
  84. #print(uniquecounts(my_data))
  85. #print(uniquecounts(st))
  86. #print(uniquecounts(sf))
  87.  
  88.  
  89. # Probability that a randomly placed item will
  90. # be in the wrong category
  91.  
  92. def log2(x):
  93.     from math import log
  94.     l2 = log(x) / log(2)
  95.     return l2
  96.  
  97.  
  98. # Entropy is the sum of p(x)log(p(x)) across all
  99. # the different possible results
  100. def entropy(rows):
  101.     results = uniquecounts(rows)
  102.     # Now calculate the entropy
  103.     ent = 0.0
  104.     for r in results.keys():
  105.         p = float(results[r]) / len(rows)
  106.         ent = ent - p * log2(p)
  107.     return ent
  108.  
  109.  
  110. #print(entropy(my_data), entropy(st), entropy(sf))
  111.  
  112.  
  113. # exit(0)
  114.  
  115.  
  116. def buildtree(rows, scoref=entropy):
  117.     if len(rows) == 0: return decisionnode()
  118.     current_score = scoref(rows)
  119.  
  120.     # Set up some variables to track the best criteria
  121.     best_gain = 0.0
  122.     best_column = -1
  123.     best_value = None
  124.     best_subsetf = None
  125.     best_subsett = None
  126.  
  127.     column_count = len(rows[0]) - 1
  128.     for col in range(column_count):
  129.         # Generate the list of different values in
  130.         # this column
  131.         column_values = set()
  132.         for row in rows:
  133.             column_values.add(row[col])
  134.         # Now try dividing the rows up for each value
  135.         # in this column
  136.         for value in column_values:
  137.             (set1, set2) = divideset(rows, col, value)
  138.  
  139.             # Information gain
  140.             p = float(len(set1)) / len(rows)
  141.             gain = current_score - p * scoref(set1) - (1 - p) * scoref(set2)
  142.             if gain > best_gain and len(set1) > 0 and len(set2) > 0:
  143.                 best_gain = gain
  144.                 best_column = col
  145.                 best_value = value
  146.                 best_subsett = set1
  147.                 best_subsetf = set2
  148.                 # best_criteria = (col, value)
  149.                 # best_sets = (set1, set2)
  150.  
  151.     # Create the subbranches
  152.     if best_gain > 0:
  153.         trueBranch = buildtree(best_subsett, scoref)
  154.         falseBranch = buildtree(best_subsetf, scoref)
  155.         return decisionnode(col=best_column, value=best_value,
  156.                             tb=trueBranch, fb=falseBranch)
  157.     else:
  158.         return decisionnode(results=uniquecounts(rows))
  159.  
  160.  
  161. t = buildtree(my_data)
  162.  
  163.  
  164. def printtree(tree, indent='',lvl=0):
  165.     # Is this a leaf node?
  166.     if tree.results != None:
  167.         print str(tree.results)
  168.     else:
  169.         # Print the criteria
  170.         print str(tree.col) + ':' + str(tree.value) + '? '+ 'Level=' + str(lvl)
  171.         # Print the branches
  172.         print indent + 'T->', #VAZNO!!! zapirka da se stavi
  173.         printtree(tree.tb, indent + '  ',lvl+1)
  174.         print indent + 'F->', #ISTO I TUKA
  175.         printtree(tree.fb, indent + '  ',lvl+1)
  176.  
  177.  
  178.  
  179. #printtree(t)
  180.  
  181. def classify(observation, tree):
  182.     if tree.results != None:
  183.         return tree.results
  184.        
  185.         #ZADACA 2 - LV
  186.         #sortiraj = sorted(tree.results.iteritems())
  187.         #return sortiraj[0][0]
  188.     else:
  189.         vrednost = observation[tree.col]
  190.         branch = None
  191.  
  192.         if isinstance(vrednost, int) or isinstance(vrednost, float):
  193.             if vrednost >= tree.value:
  194.                 branch = tree.tb
  195.             else:
  196.                 branch = tree.fb
  197.         else:
  198.             if vrednost == tree.value:
  199.                 branch = tree.tb
  200.             else:
  201.                 branch = tree.fb
  202.  
  203.         return classify(observation, branch)
  204.  
  205. if __name__ == "__main__":
  206.     # referrer='slashdot'
  207.     # location='US'
  208.     # readFAQ='no'
  209.     # pagesVisited=19
  210.     # serviceChosen='None'
  211.  
  212.     referrer=input()
  213.     location=input()
  214.     readFAQ=input()
  215.     pagesVisited=input()
  216.     serviceChosen=input()
  217.  
  218.     testCase=[referrer, location, readFAQ, pagesVisited, serviceChosen]
  219.     my_data.append(testCase)
  220.     t=buildtree(my_data)
  221.     printtree(t)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement