• API
• FAQ
• Tools
• Archive
daily pastebin goal
25%
SHARE
TWEET

# kiki

a guest Jan 23rd, 2019 74 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. #!/usr/bin/python
2. # -*- coding: utf-8 -*-
3.
4. import re
5. import math
6.
7. trainingData=[['slashdot','USA','yes',18,'None'],
11.         ['digg','USA','yes',24,'Basic'],
12.         ['kiwitobes','France','yes',23,'Basic'],
14.         ['(direct)','New Zealand','no',12,'None'],
15.         ['(direct)','UK','no',21,'Basic'],
17.         ['slashdot','France','yes',19,'None'],
18.         ['digg','USA','no',18,'None'],
20.         ['kiwitobes','UK','no',19,'None'],
21.         ['digg','New Zealand','yes',12,'Basic'],
22.         ['slashdot','UK','no',21,'None'],
24.         ['kiwitobes','France','yes',19,'Basic']]
25.
26. class decisionnode(object):
27.     def __init__(self, col=-1, value=None, results=None, tb=None, fb=None):
28.         self.col = col
29.         self.value = value
30.         self.results = results
31.         self.tb = tb
32.         self.fb = fb
33.
34. def sporedi_broj(value1, value2):
35.     return value1 >= value2
36.
37.
38. def sporedi_string(value1, value2):
39.     return value1 == value2
40.
41.
42.
43. def divideset(rows, column, value):
44.     sporedi = get_compare_func(value)
45. #     print(split_function)
46.     # Divide the rows into two sets and return them
47.     set_false = []
48.     set_true = []
49.     for row in rows:
50.         uslov=sporedi(row[column], value)
51. #         print(column, value, row[column], uslov, row)
52.         if uslov:
53.             set_true.append(row)
54.         else:
55.             set_false.append(row)
56. #     print(len(set_true), len(set_false))
57. #     set_true = [row for row in rows if
58. #             split_function(row, column, value)]  # za sekoj row od rows za koj split_function vrakja true
59. #     set_false = [row for row in rows if
60. #             not split_function(row, column, value)]  # za sekoj row od rows za koj split_function vrakja false
61.     return (set_true, set_false)
62.
63.
64. def uniquecounts(rows):
65.     d={}
66.     for r in rows:
67. #         print(r[-1])
68.         d.setdefault(r[-1], 0)
69.         d[r[-1]]+=1
70.     return d
71.
72. def entropy(rows):
73.     results = uniquecounts(rows)
74.     # Now calculate the entropy
75.     ent = 0.0
76.     n = len(rows)
77.     for label, cnt in results.items():
78. #         print(r)
79.         p = float(cnt) / n
80. #         print(label, cnt, p)
81.         ent -= p * log2(p)
82.     return ent
83.
84. def info_gain(current_score, sets, scoref=entropy):
85.     m = sum([len(s) for s in sets])
86.     gain = current_score
87.     for s in sets:
88.         n=len(s)
89.         p=1.*n/m
90.         gain -= p*scoref(s)
91.     return gain
92.
93. def buildtree(rows, scoref=entropy):
94.     if len(rows) == 0:
95.         return decisionnode()
96.     current_score = scoref(rows)
97.
98.     # Set up some variables to track the best criteria
99.     best_gain = 0.0
100.     best_column = -1
101.     best_value = None
102.     best_subsetf = None
103.     best_subsett = None
104.
105.     column_count = len(rows[0]) - 1
106.     for col in range(column_count):
107.         # Generate the list of different values in
108.         # this column
109. #         column_values = set()
110. #         for row in rows:
112. #         print(column_values)
113.         column_values = set([row[col] for row in rows])
114. #         print('Zemame vo predvid podelba po:', col, len(column_values), column_values)
115. #         continue
116.         # Now try dividing the rows up for each value
117.         # in this column
118.         for value in column_values:
119.             sets = divideset(rows, col, value)
120.
121.             # Information gain
122. #             p = float(len(set1)) / len(rows)
123. #             gain = current_score - p * scoref(set1) - (1 - p) * scoref(set2)
124.             gain = info_gain(current_score, sets, scoref)
125.             if gain > best_gain and len(sets)>0 and len(sets[0]) > 0 and len(sets[1]) > 0:
126.                 best_gain = gain
127.                 best_column = col
128.                 best_value = value
129.                 best_subsett = sets[0]
130.                 best_subsetf = sets[1]
131.                 # best_criteria = (col, value)
132.                 # best_sets = (set1, set2)
133. #             print('Dividing dataset', col, value, gain, sets)
134.     # pronajden e korenot
135. #     return
136.     # Create the subbranches
137.     if best_gain > 0:
138. #         print(best_subsett)
139. #         print(best_subsetf)
140.         print(best_column, best_value, best_gain)
141.         print('Starting true subbranch')
142.         trueBranch = buildtree(best_subsett, scoref)
143.         print()
144.         print('Starting false subbranch')
145.         falseBranch = buildtree(best_subsetf, scoref)
146.         print()
147.         return decisionnode(col=best_column, value=best_value,
148.                             tb=trueBranch, fb=falseBranch)
149.
150.     else:
151.         print('Terminalen jazol')
152.         print()
153.         return decisionnode(results=uniquecounts(rows))
154.
155. def printtree(tree, indent=''):
156.     # Is this a leaf node?
157.     if tree.results != None:
158.         print(indent + str(sorted(tree.results.items())))
159.     else:
160.         # Print the criteria
161.         print(indent + str(tree.col) + ':' + str(tree.value) + '? ')
162.         # Print the branches
163.         print(indent + 'T->')
164.         printtree(tree.tb, indent + '  ')
165.         print(indent + 'F->')
166.         printtree(tree.fb, indent + '  ')
167.
168.
169.
170.
171. def classify(observation, tree):
172.     if tree.results != None:
173.         sortirano=sorted(tree.results.items())
174.         return sortirano[0][0]
175.
176.
177.     else:
178.         vrednost = observation[tree.col]
179.         if compare_values(vrednost, tree.value):
180.            branch = tree.tb
181.         else:
182.            branch = tree.fb
183.         return classify(observation, branch)
184.
185.
186.
187.
188. if __name__ == "__main__":
189.     # referrer='slashdot'
190.     # location='UK'
192.     # pagesVisited=21
193.     # serviceChosen='Unknown'
194.
195.     referrer=input()
196.     location=input()