Advertisement
Guest User

Untitled

a guest
Apr 16th, 2018
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.68 KB | None | 0 0
  1. from math import log
  2.  
  3. def information_value(feature, target, num_bucket=10):
  4.  
  5.     data = pd.DataFrame({'tr': target})
  6.     data['bucket'] = pd.qcut(feature, q=num_bucket, duplicates='drop')
  7.     data['cnt'] = 1
  8.  
  9.     all_tr = data['tr'].mean()
  10.     all_cnt = data['cnt'].sum()
  11.    
  12.     data_agg = data.groupby('bucket').agg({'tr': 'mean', 'cnt': 'sum'})
  13.  
  14.     data_agg['tr'] = np.clip(data_agg['tr'], 0.001, 0.999)
  15.     a = data_agg['tr']/all_tr
  16.     b = (1 - data_agg['tr'])/(1-all_tr)
  17.     mult = data_agg['cnt']/all_cnt
  18.  
  19.     data_agg['iv'] = (a - b)*(np.log(a) - np.log(b)) * mult
  20.  
  21.     return data_agg['iv'].sum()
  22.  
  23. information_value(X_train[X_train.columns[5]], pd.Series(y))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement