Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Select Percentile works on the Percentiles, I create a list of percentile that
- # provides the progressive number of features from 2 to 20:
- n_comp=list(100*i/42 for i in range(2,21))
- # iteration loop
- for n in n_comp:
- comp=n
- select_sp=SelectPercentile(percentile=comp)
- X_r2 = select_sp.fit(Xtrain_scaled,ytrain).transform(Xtrain_scaled)
- X_r2_val = select_sp.transform(Xval_scaled)
- lr.fit(X_r2, ytrain)
- knn.fit(X_r2, ytrain)
- lin_svc.fit(X_r2, ytrain)
- results_logreg_sp[comp]={'LogReg Num. features':X_r2.shape[1],
- 'Training Score':lr.score(X_r2, ytrain),
- 'Valuation Score':lr.score(X_r2_val, yval)}
- results_knn_sp[comp]={'KNN Num. features':X_r2.shape[1],
- 'Training Score':knn.score(X_r2, ytrain),
- 'Valuation Score':knn.score(X_r2_val, yval)}
- results_LinSVC_sp[comp]={'Lin SVC Num. features':X_r2.shape[1],
- 'Training Score':lin_svc.score(X_r2, ytrain),
- 'Valuation Score':lin_svc.score(X_r2_val, yval)}
- # Display the dataframes:
- res_logreg_sp=pd.DataFrame(results_logreg_sp).T.set_index('LogReg Num. features')
- res_knn_sp=pd.DataFrame(results_knn_sp).T.set_index('KNN Num. features')
- res_LinSVC_sp=pd.DataFrame(results_LinSVC_sp).T.set_index('Lin SVC Num. features')
- display_side_by_side(res_logreg_sp,res_knn_sp,res_LinSVC_sp)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement