Advertisement
Guest User

Untitled

a guest
Oct 20th, 2017
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.84 KB | None | 0 0
  1. def compute_info_gain(data, att_idx):
  2. info_gain = 0.0
  3. # = compute_entropy(total) - SUM(branch/attr_total*(compute_entropy(branch)))
  4. ########## Please Fill Missing Lines Here ##########
  5. l = self.attrs[att_idx]
  6. d = {}
  7. t = 0
  8. s = 0
  9. d_keys ={}
  10. for i in l:
  11. if i in d:
  12. d[i]=d[i]+1
  13. d_keys[i].append(i)
  14. else:
  15. d[i]=1
  16. d_keys[i] = []
  17. d_keys[i].append(i)
  18. t+=1
  19. for i in range(len(l)):
  20. if l[i] in d:
  21. d_keys[l[i]].append(i)
  22. else
  23. d_keys[l[i]] = []
  24. d_keys[l[i]].append(i)
  25. for x in d:
  26. frac = d[x]/t
  27. branch = data.get_subset(data, d_keys[x])
  28. s += frac*(compute_entropy(branch))
  29. info_gain = compute_entropy(data) - s
  30. return info_gain
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement