daily pastebin goal
25%
SHARE
TWEET

kiki

a guest Jan 23rd, 2019 74 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3.  
  4. import re
  5. import math
  6.  
  7. trainingData=[['slashdot','USA','yes',18,'None'],
  8.         ['google','France','yes',23,'Premium'],
  9.         ['google','France','yes',23,'Basic'],
  10.         ['google','France','yes',23,'Basic'],
  11.         ['digg','USA','yes',24,'Basic'],
  12.         ['kiwitobes','France','yes',23,'Basic'],
  13.         ['google','UK','no',21,'Premium'],
  14.         ['(direct)','New Zealand','no',12,'None'],
  15.         ['(direct)','UK','no',21,'Basic'],
  16.         ['google','USA','no',24,'Premium'],
  17.         ['slashdot','France','yes',19,'None'],
  18.         ['digg','USA','no',18,'None'],
  19.         ['google','UK','no',18,'None'],
  20.         ['kiwitobes','UK','no',19,'None'],
  21.         ['digg','New Zealand','yes',12,'Basic'],
  22.         ['slashdot','UK','no',21,'None'],
  23.         ['google','UK','yes',18,'Basic'],
  24.         ['kiwitobes','France','yes',19,'Basic']]
  25.  
  26. class decisionnode(object):
  27.     def __init__(self, col=-1, value=None, results=None, tb=None, fb=None):
  28.         self.col = col
  29.         self.value = value
  30.         self.results = results
  31.         self.tb = tb
  32.         self.fb = fb
  33.  
  34. def sporedi_broj(value1, value2):
  35.     return value1 >= value2
  36.  
  37.  
  38. def sporedi_string(value1, value2):
  39.     return value1 == value2
  40.  
  41.    
  42.    
  43. def divideset(rows, column, value):
  44.     sporedi = get_compare_func(value)
  45. #     print(split_function)
  46.     # Divide the rows into two sets and return them
  47.     set_false = []
  48.     set_true = []
  49.     for row in rows:
  50.         uslov=sporedi(row[column], value)
  51. #         print(column, value, row[column], uslov, row)
  52.         if uslov:
  53.             set_true.append(row)
  54.         else:
  55.             set_false.append(row)
  56. #     print(len(set_true), len(set_false))
  57. #     set_true = [row for row in rows if
  58. #             split_function(row, column, value)]  # za sekoj row od rows za koj split_function vrakja true
  59. #     set_false = [row for row in rows if
  60. #             not split_function(row, column, value)]  # za sekoj row od rows za koj split_function vrakja false
  61.     return (set_true, set_false)
  62.    
  63.    
  64. def uniquecounts(rows):
  65.     d={}
  66.     for r in rows:
  67. #         print(r[-1])
  68.         d.setdefault(r[-1], 0)
  69.         d[r[-1]]+=1
  70.     return d
  71.  
  72. def entropy(rows):
  73.     results = uniquecounts(rows)
  74.     # Now calculate the entropy
  75.     ent = 0.0
  76.     n = len(rows)
  77.     for label, cnt in results.items():
  78. #         print(r)
  79.         p = float(cnt) / n
  80. #         print(label, cnt, p)
  81.         ent -= p * log2(p)
  82.     return ent
  83.  
  84. def info_gain(current_score, sets, scoref=entropy):
  85.     m = sum([len(s) for s in sets])
  86.     gain = current_score
  87.     for s in sets:
  88.         n=len(s)
  89.         p=1.*n/m
  90.         gain -= p*scoref(s)
  91.     return gain
  92.  
  93. def buildtree(rows, scoref=entropy):
  94.     if len(rows) == 0:
  95.         return decisionnode()
  96.     current_score = scoref(rows)
  97.  
  98.     # Set up some variables to track the best criteria
  99.     best_gain = 0.0
  100.     best_column = -1
  101.     best_value = None
  102.     best_subsetf = None
  103.     best_subsett = None
  104.    
  105.     column_count = len(rows[0]) - 1
  106.     for col in range(column_count):
  107.         # Generate the list of different values in
  108.         # this column
  109. #         column_values = set()
  110. #         for row in rows:
  111. #             column_values.add(row[col])
  112. #         print(column_values)
  113.         column_values = set([row[col] for row in rows])
  114. #         print('Zemame vo predvid podelba po:', col, len(column_values), column_values)
  115. #         continue
  116.         # Now try dividing the rows up for each value
  117.         # in this column
  118.         for value in column_values:
  119.             sets = divideset(rows, col, value)
  120.  
  121.             # Information gain
  122. #             p = float(len(set1)) / len(rows)
  123. #             gain = current_score - p * scoref(set1) - (1 - p) * scoref(set2)
  124.             gain = info_gain(current_score, sets, scoref)
  125.             if gain > best_gain and len(sets)>0 and len(sets[0]) > 0 and len(sets[1]) > 0:
  126.                 best_gain = gain
  127.                 best_column = col
  128.                 best_value = value
  129.                 best_subsett = sets[0]
  130.                 best_subsetf = sets[1]
  131.                 # best_criteria = (col, value)
  132.                 # best_sets = (set1, set2)
  133. #             print('Dividing dataset', col, value, gain, sets)
  134.     # pronajden e korenot
  135. #     return
  136.     # Create the subbranches
  137.     if best_gain > 0:
  138. #         print(best_subsett)
  139. #         print(best_subsetf)
  140.         print(best_column, best_value, best_gain)
  141.         print('Starting true subbranch')
  142.         trueBranch = buildtree(best_subsett, scoref)
  143.         print()
  144.         print('Starting false subbranch')
  145.         falseBranch = buildtree(best_subsetf, scoref)
  146.         print()
  147.         return decisionnode(col=best_column, value=best_value,
  148.                             tb=trueBranch, fb=falseBranch)
  149.  
  150.     else:
  151.         print('Terminalen jazol')
  152.         print()
  153.         return decisionnode(results=uniquecounts(rows))
  154.  
  155. def printtree(tree, indent=''):
  156.     # Is this a leaf node?
  157.     if tree.results != None:
  158.         print(indent + str(sorted(tree.results.items())))
  159.     else:
  160.         # Print the criteria
  161.         print(indent + str(tree.col) + ':' + str(tree.value) + '? ')
  162.         # Print the branches
  163.         print(indent + 'T->')
  164.         printtree(tree.tb, indent + '  ')
  165.         print(indent + 'F->')
  166.         printtree(tree.fb, indent + '  ')
  167.  
  168.  
  169.  
  170.  
  171. def classify(observation, tree):
  172.     if tree.results != None:
  173.         sortirano=sorted(tree.results.items())
  174.         return sortirano[0][0]
  175.    
  176.    
  177.     else:
  178.         vrednost = observation[tree.col]
  179.         if compare_values(vrednost, tree.value):
  180.            branch = tree.tb
  181.         else:
  182.            branch = tree.fb
  183.         return classify(observation, branch)
  184.        
  185.    
  186.  
  187.  
  188. if __name__ == "__main__":
  189.     # referrer='slashdot'
  190.     # location='UK'
  191.     # readFAQ='no'
  192.     # pagesVisited=21
  193.     # serviceChosen='Unknown'
  194.  
  195.     referrer=input()
  196.     location=input()
  197.     readFAQ=input()
  198.     pagesVisited=input()
  199.     serviceChosen=input()
  200.    
  201.     testCase=[referrer,location,readFAQ,pagesVisited,serviceChosen]
  202.     buildtree(trainingData)
  203.     klasa=classify(testCase,tree)
  204.     print klasa
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top