Guest User

Untitled

a guest
Dec 16th, 2017
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.93 KB | None | 0 0
  1. from statsmodels.stats.outliers_influence import variance_inflation_factor
  2.  
  3. vif_threshold = 10
  4. all_feat = lr_model_feats
  5. max_vif_value = np.Inf
  6. max_vif_feature = None
  7. iter_num = 0
  8. while max_vif_value > vif_threshold:
  9. iter_num += 1
  10. t_start = - time.time()
  11. print('-' * 100)
  12. print(max_vif_value, max_vif_feature, len(all_feat))
  13. if max_vif_feature is not None:
  14. all_feat = [t for t in all_feat if t != max_vif_feature]
  15. vif = pd.DataFrame()
  16. X = calc_df[all_feat]
  17. vif["vif_value"] = [variance_inflation_factor(X.fillna(0).values, i) for i in range(X.shape[1])]
  18. vif['feature_name'] = X.columns
  19. max_vif_value = vif.vif_value.max()
  20. max_vif_feature = vif.sort_values("vif_value", ascending=False).head(1).feature_name.values[0]
  21. t_cost = time.time() + t_start
  22. print("---- iter %s: cost %s" % (iter_num, t_cost))
  23.  
  24. print("done!")
  25. print(max_vif_value, max_vif_feature)
  26. print(len(all_feat))
  27. print(all_feat)
Add Comment
Please, Sign In to add comment