Advertisement
Guest User

zadaca 1

a guest
Oct 20th, 2017
82
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.45 KB | None | 0 0
  1. trainingData = [['slashdot', 'USA', 'yes', 18, 'None'],
  2.                 ['google', 'France', 'yes', 23, 'Premium'],
  3.                 ['google', 'France', 'yes', 23, 'Basic'],
  4.                 ['google', 'France', 'yes', 23, 'Basic'],
  5.                 ['digg', 'USA', 'yes', 24, 'Basic'],
  6.                 ['kiwitobes', 'France', 'yes', 23, 'Basic'],
  7.                 ['google', 'UK', 'no', 21, 'Premium'],
  8.                 ['(direct)', 'New Zealand', 'no', 12, 'None'],
  9.                 ['(direct)', 'UK', 'no', 21, 'Basic'],
  10.                 ['google', 'USA', 'no', 24, 'Premium'],
  11.                 ['slashdot', 'France', 'yes', 19, 'None'],
  12.                 ['digg', 'USA', 'no', 18, 'None'],
  13.                 ['google', 'UK', 'no', 18, 'None'],
  14.                 ['kiwitobes', 'UK', 'no', 19, 'None'],
  15.                 ['digg', 'New Zealand', 'yes', 12, 'Basic'],
  16.                 ['slashdot', 'UK', 'no', 21, 'None'],
  17.                 ['google', 'UK', 'yes', 18, 'Basic'],
  18.                 ['kiwitobes', 'France', 'yes', 19, 'Basic']]
  19.  
  20.  
  21. class decisionnode:
  22.     def __init__(self, col=-1, value=None, results=None, tb=None, fb=None):
  23.         self.col = col
  24.         self.value = value
  25.         self.results = results
  26.         self.tb = tb
  27.         self.fb = fb
  28.  
  29.  
  30. def sporedi_broj(row, column, value):
  31.     return row[column] >= value
  32.  
  33.  
  34. def sporedi_string(row, column, value):
  35.     return row[column] == value
  36.  
  37.  
  38. # Divides a set on a specific column. Can handle numeric
  39. # or nominal values
  40. def divideset(rows, column, value):
  41.     # Make a function that tells us if a row is in
  42.     # the first group (true) or the second group (false)
  43.     split_function = None
  44.     if isinstance(value, int) or isinstance(value, float):  # ako vrednosta so koja sporeduvame e od tip int ili float
  45.         # split_function=lambda row:row[column]>=value # togas vrati funkcija cij argument e row i vrakja vrednost true ili false
  46.         split_function = sporedi_broj
  47.     else:
  48.         # split_function=lambda row:row[column]==value # ako vrednosta so koja sporeduvame e od drug tip (string)
  49.         split_function = sporedi_string
  50.  
  51.     # Divide the rows into two sets and return them
  52.     set_false = []
  53.     set_true = []
  54.     for row in rows:
  55.         if split_function(row, column, value):
  56.             set_true.append(row)
  57.         else:
  58.             set_false.append(row)
  59.     set1 = [row for row in rows if
  60.             split_function(row, column, value)]  # za sekoj row od rows za koj split_function vrakja true
  61.     set2 = [row for row in rows if
  62.             not split_function(row, column, value)]  # za sekoj row od rows za koj split_function vrakja false
  63.     # return (set1, set2)
  64.     return (set_true, set_false)
  65.  
  66.  
  67. # Create counts of possible results (the last column of
  68. # each row is the result)
  69. def uniquecounts(rows):
  70.     results = {}
  71.     for row in rows:
  72.         # The result is the last column
  73.         r = row[-1]
  74.         results.setdefault(r, 0)
  75.         results[r] += 1
  76.  
  77.     return results
  78.  
  79.  
  80. # Probability that a randomly placed item will
  81. # be in the wrong category
  82.  
  83. def log2(x):
  84.     from math import log
  85.     l2 = log(x) / log(2)
  86.     return l2
  87.  
  88.  
  89. # Entropy is the sum of p(x)log(p(x)) across all
  90. # the different possible results
  91. def entropy(rows):
  92.     results = uniquecounts(rows)
  93.     # Now calculate the entropy
  94.     ent = 0.0
  95.     for r in results.keys():
  96.         p = float(results[r]) / len(rows)
  97.         ent = ent - p * log2(p)
  98.     return ent
  99.  
  100.  
  101. # exit(0)
  102.  
  103.  
  104. def buildtree(rows, scoref=entropy):
  105.     if len(rows) == 0: return decisionnode()
  106.     current_score = scoref(rows)
  107.  
  108.     # Set up some variables to track the best criteria
  109.     best_gain = 0.0
  110.     best_column = -1
  111.     best_value = None
  112.     best_subsetf = None
  113.     best_subsett = None
  114.  
  115.     column_count = len(rows[0]) - 1
  116.     for col in range(column_count):
  117.         # Generate the list of different values in
  118.         # this column
  119.         column_values = {}
  120.         for row in rows:
  121.             column_values[row[col]]=1
  122.         # Now try dividing the rows up for each value
  123.         # in this column
  124.         for value in column_values.keys():
  125.             (set1, set2) = divideset(rows, col, value)
  126.  
  127.             # Information gain
  128.             p = float(len(set1)) / len(rows)
  129.             gain = current_score - p * scoref(set1) - (1 - p) * scoref(set2)
  130.             if gain > best_gain and len(set1) > 0 and len(set2) > 0:
  131.                 best_gain = gain
  132.                 best_column = col
  133.                 best_value = value
  134.                 best_subsett = set1
  135.                 best_subsetf = set2
  136.                 # best_criteria = (col, value)
  137.                 # best_sets = (set1, set2)
  138.  
  139.     # Create the subbranches
  140.     if best_gain > 0:
  141.         trueBranch = buildtree(best_subsett, scoref)
  142.         falseBranch = buildtree(best_subsetf, scoref)
  143.         return decisionnode(col=best_column, value=best_value,
  144.                             tb=trueBranch, fb=falseBranch)
  145.     else:
  146.         return decisionnode(results=uniquecounts(rows))
  147.  
  148.  
  149. def printtree(tree, indent='',level=0):
  150.     # Is this a leaf node?
  151.     if tree.results != None:
  152.         print(indent + str(sorted(tree.results.items())))
  153.     else:
  154.         # Print the criteria
  155.         print(indent + str(tree.col) + ':' + str(tree.value) + '? Level='+str(level))
  156.         level+1;
  157.         # Print the branches
  158.         print(indent + 'T->')
  159.         printtree(tree.tb, indent + '  ')
  160.         print(indent + 'F->')
  161.         printtree(tree.fb, indent + '  ')
  162.  
  163.  
  164. # exit(0)
  165.  
  166. def classify(observation, tree):
  167.     if tree.results != None:
  168.         return tree.results
  169.     else:
  170.         vrednost = observation[tree.col]
  171.         branch = None
  172.  
  173.         if isinstance(vrednost, int) or isinstance(vrednost, float):
  174.             if vrednost >= tree.value:
  175.                 branch = tree.tb
  176.             else:
  177.                 branch = tree.fb
  178.         else:
  179.             if vrednost == tree.value:
  180.                 branch = tree.tb
  181.             else:
  182.                 branch = tree.fb
  183.  
  184.         return classify(observation, branch)
  185.  
  186.  
  187. if __name__ == "__main__":
  188.     # referrer='slashdot'
  189.     # location='US'
  190.     # readFAQ='no'
  191.     # pagesVisited=19
  192.     # serviceChosen='None'
  193.  
  194.     referrer = input()
  195.     location = input()
  196.     readFAQ = input()
  197.     pagesVisited = input()
  198.     serviceChosen = input()
  199.  
  200.     testCase = [referrer, location, readFAQ, pagesVisited, serviceChosen]
  201.     trainingData.append(testCase)
  202.     t = buildtree(trainingData)
  203.     printtree(t)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement