Advertisement
Guest User

Untitled

a guest
Jun 24th, 2019
72
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.89 KB | None | 0 0
  1. def vif_cal_iter(inputdata,vif_threshold):
  2. xvar_names = inputdata.columns
  3. vif_max = vif_threshold + 1
  4. def vif_cal(inputdata, xvar_names, vif_max, colnum_max, vif_threshold):
  5. vif_max = vif_threshold
  6. for i in range(2,len(xvar_names)):
  7. train_t = inputdata.rdd.map(lambda x: [Vectors.dense(x[3:i]+x[i+2:]), x[i]]).toDF(['features', 'label'])
  8. lr = LinearRegression(featuresCol = 'features', labelCol = 'label', maxIter=2)
  9. lr_model = lr.fit(train_t)
  10. r_sq = lr_model.summary.r2
  11. vif=1/(1-r_sq)
  12. if vif_max < vif:
  13. vif_max = vif
  14. colnum_max = i
  15. return vif_max, colnum_max
  16. while vif_max > 5:
  17. vif_max, colnum_max = vif_cal(inputdata, xvar_names, vif_max, colnum_max, vif_threshold)
  18. if vif_max > vif_threshold:
  19. print("Start of If Block")
  20. inputdata = inputdata.drop(inputdata[colnum_max])
  21. else:
  22. return inputdata
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement