cimona

LabDrva

Sep 14th, 2018
302
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.66 KB | None | 0 0
  1. my_data = [['slashdot', 'USA', 'yes', 18, 'None'],
  2. ['google', 'France', 'yes', 23, 'Premium'],
  3. ['digg', 'USA', 'yes', 24, 'Basic'],
  4. ['kiwitobes', 'France', 'yes', 23, 'Basic'],
  5. ['google', 'UK', 'no', 21, 'Premium'],
  6. ['(direct)', 'New Zealand', 'no', 12, 'None'],
  7. ['(direct)', 'UK', 'no', 21, 'Basic'],
  8. ['google', 'USA', 'no', 24, 'Premium'],
  9. ['slashdot', 'France', 'yes', 19, 'None'],
  10. ['digg', 'USA', 'no', 18, 'None'],
  11. ['google', 'UK', 'no', 18, 'None'],
  12. ['kiwitobes', 'UK', 'no', 19, 'None'],
  13. ['digg', 'New Zealand', 'yes', 12, 'Basic'],
  14. ['slashdot', 'UK', 'no', 21, 'None'],
  15. ['google', 'UK', 'yes', 18, 'Basic'],
  16. ['kiwitobes', 'France', 'yes', 19, 'Basic']]
  17.  
  18. test_cases = [['google', 'MK', 'no', 24, 'Unknown'],
  19. ['google', 'MK', 'no', 15, 'Unknown'],
  20. ['digg', 'UK', 'yes', 21, 'Unknown'],
  21. ['digg', 'UK', 'no', 25, 'Unknown']]
  22.  
  23.  
  24. # my_data=[line.split('\t') for line in file('decision_tree_example.txt')]
  25.  
  26. class decisionnode:
  27. def __init__(self, col=-1, value=None, results=None, tb=None, fb=None):
  28. self.col = col
  29. self.value = value
  30. self.results = results
  31. self.tb = tb
  32. self.fb = fb
  33.  
  34.  
  35. def sporedi_broj(row, column, value):
  36. return row[column] >= value
  37.  
  38.  
  39. def sporedi_string(row, column, value):
  40. return row[column] == value
  41.  
  42.  
  43. # Divides a set on a specific column. Can handle numeric
  44. # or nominal values
  45. def divideset(rows, column, value):
  46. # Make a function that tells us if a row is in
  47. # the first group (true) or the second group (false)
  48. split_function = None
  49. if isinstance(value, int) or isinstance(value, float): # ako vrednosta so koja sporeduvame e od tip int ili float
  50. # split_function=lambda row:row[column]>=value # togas vrati funkcija cij argument e row i vrakja vrednost true ili false
  51. split_function = sporedi_broj
  52. else:
  53. # split_function=lambda row:row[column]==value # ako vrednosta so koja sporeduvame e od drug tip (string)
  54. split_function = sporedi_string
  55.  
  56. # Divide the rows into two sets and return them
  57. # set1=[row for row in rows if split_function(row)] # za sekoj row od rows za koj split_function vrakja true
  58. # set2=[row for row in rows if not split_function(row)] # za sekoj row od rows za koj split_function vrakja false
  59. set1 = [row for row in rows if
  60. split_function(row, column, value)] # za sekoj row od rows za koj split_function vrakja true
  61. set2 = [row for row in rows if
  62. not split_function(row, column, value)] # za sekoj row od rows za koj split_function vrakja false
  63. return (set1, set2)
  64.  
  65.  
  66. # Create counts of possible results (the last column of
  67. # each row is the result)
  68. def uniquecounts(rows):
  69. results = {}
  70. for row in rows:
  71. # The result is the last column
  72. r = row[len(row) - 1]
  73. if r not in results: results[r] = 0
  74. results[r] += 1
  75. return results
  76.  
  77.  
  78. # Probability that a randomly placed item will
  79. # be in the wrong category
  80. def giniimpurity(rows):
  81. total = len(rows)
  82. counts = uniquecounts(rows)
  83. imp = 0
  84. for k1 in counts:
  85. p1 = float(counts[k1]) / total
  86. for k2 in counts:
  87. if k1 == k2: continue
  88. p2 = float(counts[k2]) / total
  89. imp += p1 * p2
  90. return imp
  91.  
  92.  
  93. # Entropy is the sum of p(x)log(p(x)) across all
  94. # the different possible results
  95. def entropy(rows):
  96. from math import log
  97. log2 = lambda x: log(x) / log(2)
  98. results = uniquecounts(rows)
  99. # Now calculate the entropy
  100. ent = 0.0
  101. for r in results.keys():
  102. p = float(results[r]) / len(rows)
  103. ent = ent - p * log2(p)
  104. return ent
  105.  
  106.  
  107. def buildtree(rows, scoref=entropy):
  108. if len(rows) == 0: return decisionnode()
  109. current_score = scoref(rows)
  110.  
  111. # Set up some variables to track the best criteria
  112. best_gain = 0.0
  113. best_criteria = None
  114. best_sets = None
  115.  
  116. column_count = len(rows[0]) - 1
  117. for col in range(0, column_count):
  118. # Generate the list of different values in
  119. # this column
  120. column_values = {}
  121. for row in rows:
  122. column_values[row[col]] = 1
  123. # print
  124. # Now try dividing the rows up for each value
  125. # in this column
  126. for value in column_values.keys():
  127. (set1, set2) = divideset(rows, col, value)
  128.  
  129. # Information gain
  130. p = float(len(set1)) / len(rows)
  131. gain = current_score - p * scoref(set1) - (1 - p) * scoref(set2)
  132. if gain > best_gain and len(set1) > 0 and len(set2) > 0:
  133. best_gain = gain
  134. best_criteria = (col, value)
  135. best_sets = (set1, set2)
  136.  
  137. # Create the subbranches
  138. if best_gain > 0:
  139. trueBranch = buildtree(best_sets[0])
  140. falseBranch = buildtree(best_sets[1])
  141. return decisionnode(col=best_criteria[0], value=best_criteria[1],
  142. tb=trueBranch, fb=falseBranch)
  143. else:
  144. return decisionnode(results=uniquecounts(rows))
  145.  
  146.  
  147. def printtree(tree, indent=''):
  148. # Is this a leaf node?
  149. if tree.results != None:
  150. print(str(tree.results))
  151. else:
  152. # Print the criteria
  153. print(str(tree.col) + ':' + str(tree.value) + '? ')
  154. # Print the branches
  155. print(indent + 'T->'),
  156. printtree(tree.tb, indent + ' ')
  157. print(indent + 'F->'),
  158. printtree(tree.fb, indent + ' ')
  159.  
  160.  
  161. def classify(observation, tree):
  162. if tree.results != None:
  163. return tree.results
  164. else:
  165. vrednost = observation[tree.col]
  166. branch = None
  167.  
  168. if isinstance(vrednost, int) or isinstance(vrednost, float):
  169. if vrednost >= tree.value:
  170. branch = tree.tb
  171. else:
  172. branch = tree.fb
  173. else:
  174. if vrednost == tree.value:
  175. branch = tree.tb
  176. else:
  177. branch = tree.fb
  178.  
  179. return classify(observation, branch)
  180.  
  181.  
  182. trainingData = [
  183. [6.3, 2.9, 5.6, 1.8, 'I. virginica'],
  184. [6.5, 3.0, 5.8, 2.2, 'I. virginica'],
  185. [7.6, 3.0, 6.6, 2.1, 'I. virginica'],
  186. [4.9, 2.5, 4.5, 1.7, 'I. virginica'],
  187. [7.3, 2.9, 6.3, 1.8, 'I. virginica'],
  188. [6.7, 2.5, 5.8, 1.8, 'I. virginica'],
  189. [7.2, 3.6, 6.1, 2.5, 'I. virginica'],
  190. [6.5, 3.2, 5.1, 2.0, 'I. virginica'],
  191. [6.4, 2.7, 5.3, 1.9, 'I. virginica'],
  192. [6.8, 3.0, 5.5, 2.1, 'I. virginica'],
  193. [5.7, 2.5, 5.0, 2.0, 'I. virginica'],
  194. [5.8, 2.8, 5.1, 2.4, 'I. virginica'],
  195. [6.4, 3.2, 5.3, 2.3, 'I. virginica'],
  196. [6.5, 3.0, 5.5, 1.8, 'I. virginica'],
  197. [7.7, 3.8, 6.7, 2.2, 'I. virginica'],
  198. [7.7, 2.6, 6.9, 2.3, 'I. virginica'],
  199. [6.0, 2.2, 5.0, 1.5, 'I. virginica'],
  200. [6.9, 3.2, 5.7, 2.3, 'I. virginica'],
  201. [5.6, 2.8, 4.9, 2.0, 'I. virginica'],
  202. [7.7, 2.8, 6.7, 2.0, 'I. virginica'],
  203. [6.3, 2.7, 4.9, 1.8, 'I. virginica'],
  204. [6.7, 3.3, 5.7, 2.1, 'I. virginica'],
  205. [7.2, 3.2, 6.0, 1.8, 'I. virginica'],
  206. [6.2, 2.8, 4.8, 1.8, 'I. virginica'],
  207. [6.1, 3.0, 4.9, 1.8, 'I. virginica'],
  208. [6.4, 2.8, 5.6, 2.1, 'I. virginica'],
  209. [7.2, 3.0, 5.8, 1.6, 'I. virginica'],
  210. [7.4, 2.8, 6.1, 1.9, 'I. virginica'],
  211. [7.9, 3.8, 6.4, 2.0, 'I. virginica'],
  212. [6.4, 2.8, 5.6, 2.2, 'I. virginica'],
  213. [6.3, 2.8, 5.1, 1.5, 'I. virginica'],
  214. [6.1, 2.6, 5.6, 1.4, 'I. virginica'],
  215. [7.7, 3.0, 6.1, 2.3, 'I. virginica'],
  216. [6.3, 3.4, 5.6, 2.4, 'I. virginica'],
  217. [5.1, 3.5, 1.4, 0.2, 'I. setosa'],
  218. [4.9, 3.0, 1.4, 0.2, 'I. setosa'],
  219. [4.7, 3.2, 1.3, 0.2, 'I. setosa'],
  220. [4.6, 3.1, 1.5, 0.2, 'I. setosa'],
  221. [5.0, 3.6, 1.4, 0.2, 'I. setosa'],
  222. [5.4, 3.9, 1.7, 0.4, 'I. setosa'],
  223. [4.6, 3.4, 1.4, 0.3, 'I. setosa'],
  224. [5.0, 3.4, 1.5, 0.2, 'I. setosa'],
  225. [4.4, 2.9, 1.4, 0.2, 'I. setosa'],
  226. [4.9, 3.1, 1.5, 0.1, 'I. setosa'],
  227. [5.4, 3.7, 1.5, 0.2, 'I. setosa'],
  228. [4.8, 3.4, 1.6, 0.2, 'I. setosa'],
  229. [4.8, 3.0, 1.4, 0.1, 'I. setosa'],
  230. [4.3, 3.0, 1.1, 0.1, 'I. setosa'],
  231. [5.8, 4.0, 1.2, 0.2, 'I. setosa'],
  232. [5.7, 4.4, 1.5, 0.4, 'I. setosa'],
  233. [5.4, 3.9, 1.3, 0.4, 'I. setosa'],
  234. [5.1, 3.5, 1.4, 0.3, 'I. setosa'],
  235. [5.7, 3.8, 1.7, 0.3, 'I. setosa'],
  236. [5.1, 3.8, 1.5, 0.3, 'I. setosa'],
  237. [5.4, 3.4, 1.7, 0.2, 'I. setosa'],
  238. [5.1, 3.7, 1.5, 0.4, 'I. setosa'],
  239. [4.6, 3.6, 1.0, 0.2, 'I. setosa'],
  240. [5.1, 3.3, 1.7, 0.5, 'I. setosa'],
  241. [4.8, 3.4, 1.9, 0.2, 'I. setosa'],
  242. [5.0, 3.0, 1.6, 0.2, 'I. setosa'],
  243. [5.0, 3.4, 1.6, 0.4, 'I. setosa'],
  244. [5.2, 3.5, 1.5, 0.2, 'I. setosa'],
  245. [5.2, 3.4, 1.4, 0.2, 'I. setosa'],
  246. [5.5, 2.3, 4.0, 1.3, 'I. versicolor'],
  247. [6.5, 2.8, 4.6, 1.5, 'I. versicolor'],
  248. [5.7, 2.8, 4.5, 1.3, 'I. versicolor'],
  249. [6.3, 3.3, 4.7, 1.6, 'I. versicolor'],
  250. [4.9, 2.4, 3.3, 1.0, 'I. versicolor'],
  251. [6.6, 2.9, 4.6, 1.3, 'I. versicolor'],
  252. [5.2, 2.7, 3.9, 1.4, 'I. versicolor'],
  253. [5.0, 2.0, 3.5, 1.0, 'I. versicolor'],
  254. [5.9, 3.0, 4.2, 1.5, 'I. versicolor'],
  255. [6.0, 2.2, 4.0, 1.0, 'I. versicolor'],
  256. [6.1, 2.9, 4.7, 1.4, 'I. versicolor'],
  257. [5.6, 2.9, 3.6, 1.3, 'I. versicolor'],
  258. [6.7, 3.1, 4.4, 1.4, 'I. versicolor'],
  259. [5.6, 3.0, 4.5, 1.5, 'I. versicolor'],
  260. [5.8, 2.7, 4.1, 1.0, 'I. versicolor'],
  261. [6.2, 2.2, 4.5, 1.5, 'I. versicolor'],
  262. [5.6, 2.5, 3.9, 1.1, 'I. versicolor'],
  263. [5.9, 3.2, 4.8, 1.8, 'I. versicolor'],
  264. [6.1, 2.8, 4.0, 1.3, 'I. versicolor'],
  265. [6.3, 2.5, 4.9, 1.5, 'I. versicolor'],
  266. [6.1, 2.8, 4.7, 1.2, 'I. versicolor'],
  267. [6.4, 2.9, 4.3, 1.3, 'I. versicolor'],
  268. [6.6, 3.0, 4.4, 1.4, 'I. versicolor'],
  269. [6.8, 2.8, 4.8, 1.4, 'I. versicolor'],
  270. [6.7, 3.0, 5.0, 1.7, 'I. versicolor'],
  271. [6.0, 2.9, 4.5, 1.5, 'I. versicolor'],
  272. [5.7, 2.6, 3.5, 1.0, 'I. versicolor'],
  273. [5.5, 2.4, 3.8, 1.1, 'I. versicolor'],
  274. [5.5, 2.4, 3.7, 1.0, 'I. versicolor'],
  275. [5.8, 2.7, 3.9, 1.2, 'I. versicolor'],
  276. [6.0, 2.7, 5.1, 1.6, 'I. versicolor'],
  277. [5.4, 3.0, 4.5, 1.5, 'I. versicolor'],
  278. [6.0, 3.4, 4.5, 1.6, 'I. versicolor'],
  279. [6.7, 3.1, 4.7, 1.5, 'I. versicolor'],
  280. [6.3, 2.3, 4.4, 1.3, 'I. versicolor'],
  281. [5.6, 3.0, 4.1, 1.3, 'I. versicolor'],
  282. [5.5, 2.5, 4.0, 1.3, 'I. versicolor'],
  283. [5.5, 2.6, 4.4, 1.2, 'I. versicolor'],
  284. [6.1, 3.0, 4.6, 1.4, 'I. versicolor'],
  285. [5.8, 2.6, 4.0, 1.2, 'I. versicolor'],
  286. [5.0, 2.3, 3.3, 1.0, 'I. versicolor'],
  287. [5.6, 2.7, 4.2, 1.3, 'I. versicolor'],
  288. [5.7, 3.0, 4.2, 1.2, 'I. versicolor'],
  289. [5.7, 2.9, 4.2, 1.3, 'I. versicolor'],
  290. [6.2, 2.9, 4.3, 1.3, 'I. versicolor'],
  291. [5.1, 2.5, 3.0, 1.1, 'I. versicolor'],
  292. [5.7, 2.8, 4.1, 1.3, 'I. versicolor'],
  293. [6.4, 3.1, 5.5, 1.8, 'I. virginica'],
  294. [6.0, 3.0, 4.8, 1.8, 'I. virginica'],
  295. [6.9, 3.1, 5.4, 2.1, 'I. virginica'],
  296. [6.7, 3.1, 5.6, 2.4, 'I. virginica'],
  297. [6.9, 3.1, 5.1, 2.3, 'I. virginica'],
  298. [5.8, 2.7, 5.1, 1.9, 'I. virginica'],
  299. [6.8, 3.2, 5.9, 2.3, 'I. virginica'],
  300. [6.7, 3.3, 5.7, 2.5, 'I. virginica'],
  301. [6.7, 3.0, 5.2, 2.3, 'I. virginica'],
  302. [6.3, 2.5, 5.0, 1.9, 'I. virginica'],
  303. [6.5, 3.0, 5.2, 2.0, 'I. virginica'],
  304. [6.2, 3.4, 5.4, 2.3, 'I. virginica'],
  305. [4.7, 3.2, 1.6, 0.2, 'I. setosa'],
  306. [4.8, 3.1, 1.6, 0.2, 'I. setosa'],
  307. [5.4, 3.4, 1.5, 0.4, 'I. setosa'],
  308. [5.2, 4.1, 1.5, 0.1, 'I. setosa'],
  309. [5.5, 4.2, 1.4, 0.2, 'I. setosa'],
  310. [4.9, 3.1, 1.5, 0.2, 'I. setosa'],
  311. [5.0, 3.2, 1.2, 0.2, 'I. setosa'],
  312. [5.5, 3.5, 1.3, 0.2, 'I. setosa'],
  313. [4.9, 3.6, 1.4, 0.1, 'I. setosa'],
  314. [4.4, 3.0, 1.3, 0.2, 'I. setosa'],
  315. [5.1, 3.4, 1.5, 0.2, 'I. setosa'],
  316. [5.0, 3.5, 1.3, 0.3, 'I. setosa'],
  317. [4.5, 2.3, 1.3, 0.3, 'I. setosa'],
  318. [4.4, 3.2, 1.3, 0.2, 'I. setosa'],
  319. [5.0, 3.5, 1.6, 0.6, 'I. setosa'],
  320. [5.1, 3.8, 1.9, 0.4, 'I. setosa'],
  321. [4.8, 3.0, 1.4, 0.3, 'I. setosa'],
  322. [5.1, 3.8, 1.6, 0.2, 'I. setosa'],
  323. [5.9, 3.0, 5.1, 1.8, 'I. virginica']
  324. ]
  325.  
  326.  
  327. def first(trainingData):
  328. return trainingData[:int(len(trainingData)/2)]
  329.  
  330.  
  331. def second(trainingData):
  332. return trainingData[int(len(trainingData)/2):]
  333.  
  334.  
  335. if __name__ == "__main__":
  336. att1 = input()
  337. att2 = input()
  338. att3 = input()
  339. att4 = input()
  340. planttype = input()
  341. testCase = [att1, att2, att3, att4, planttype]
  342. t1 = first(trainingData)
  343. t2 = second(trainingData)
  344.  
  345. tree1 = buildtree(t1)
  346. tree2 = buildtree(t2)
  347.  
  348. c1 = classify(testCase, tree1)
  349. c2 = classify(testCase, tree2)
  350.  
  351. if c1.keys() != c2.keys():
  352. print("KONTRADIKCIJA")
  353. else:
  354. for key,value in c1.items():
  355. print key
Add Comment
Please, Sign In to add comment