Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from math import log
- def information_value(feature, target, num_bucket=10):
- data = pd.DataFrame({'tr': target})
- data['bucket'] = pd.qcut(feature, q=num_bucket, duplicates='drop')
- data['cnt'] = 1
- all_tr = data['tr'].mean()
- all_cnt = data['cnt'].sum()
- data_agg = data.groupby('bucket').agg({'tr': 'mean', 'cnt': 'sum'})
- data_agg['tr'] = np.clip(data_agg['tr'], 0.001, 0.999)
- a = data_agg['tr']/all_tr
- b = (1 - data_agg['tr'])/(1-all_tr)
- mult = data_agg['cnt']/all_cnt
- data_agg['iv'] = (a - b)*(np.log(a) - np.log(b)) * mult
- return data_agg['iv'].sum()
- information_value(X_train[X_train.columns[5]], pd.Series(y))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement