Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def compute_info_gain(data, att_idx):
- info_gain = 0.0
- # = compute_entropy(total) - SUM(branch/attr_total*(compute_entropy(branch)))
- ########## Please Fill Missing Lines Here ##########
- l = self.attrs[att_idx]
- d = {}
- t = 0
- s = 0
- d_keys ={}
- for i in l:
- if i in d:
- d[i]=d[i]+1
- d_keys[i].append(i)
- else:
- d[i]=1
- d_keys[i] = []
- d_keys[i].append(i)
- t+=1
- for i in range(len(l)):
- if l[i] in d:
- d_keys[l[i]].append(i)
- else
- d_keys[l[i]] = []
- d_keys[l[i]].append(i)
- for x in d:
- frac = d[x]/t
- branch = data.get_subset(data, d_keys[x])
- s += frac*(compute_entropy(branch))
- info_gain = compute_entropy(data) - s
- return info_gain
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement