Advertisement
Guest User

Untitled

a guest
Jun 27th, 2019
116
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.40 KB | None | 0 0
  1. from sklearn.pipeline import Pipeline
  2. from sklearn.model_selection import GridSearchCV
  3. from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler
  4. from sklearn.decomposition import PCA
  5. from sklearn.neighbors import KNeighborsClassifier
  6. from sklearn.ensemble import GradientBoostingClassifier
  7. from pipelinehelper.pipelinehelper import PipelineHelper
  8.  
  9. pipe = Pipeline([
  10. ('scaler', PipelineHelper([
  11. ('std', StandardScaler()),
  12. ('abs', MaxAbsScaler()),
  13. ('minmax', MinMaxScaler()),
  14. ('pca', PCA(svd_solver='full', whiten=True)),
  15. ])),
  16. ('classifier', PipelineHelper([
  17. ('knn', KNeighborsClassifier(weights='distance')),
  18. ('gbc', GradientBoostingClassifier())
  19. ])),
  20. ])
  21. params = {
  22. 'scaler__selected_model': pipe.named_steps['scaler'].generate({
  23. 'std__with_mean': [True, False],
  24. 'std__with_std': [True, False],
  25. 'pca__n_components': [0.5, 0.75, 0.9, 0.99],
  26. }),
  27. 'classifier__selected_model': pipe.named_steps['classifier'].generate({
  28. 'knn__n_neighbors': [1, 3, 5, 7, 10],#, 30, 50, 70, 90, 110, 130, 150, 170, 190],
  29. 'gbc__learning_rate': [0.1, 0.5, 1.0],
  30. 'gbc__subsample': [0.5, 1.0],
  31. })
  32. }
  33.  
  34. grid = GridSearchCV(pipe, params, scoring='roc_auc', n_jobs=1, verbose=1, cv=5)
  35. grid.fit(X, y)
  36.  
  37. >>> X.shape
  38. ... (13885, 23)
  39. >>> y.shape
  40. ... (13885,)
  41. >>> X
  42. ... array([[ 0. , 0. , 0. , ..., 7.14285714,
  43. 0.9 , 35.4644354 ],
  44. [ 0. , 0. , 0. , ..., 2.11442806,
  45. 1.2 , 54.99027913],
  46. [ 1. , 0. , 0. , ..., 2.64959194,
  47. 0.7 , 70.07380534],
  48. ...,
  49. [ 1. , 0. , 0. , ..., 4.375 ,
  50. 0.5 , 91.85932945],
  51. [ 1. , 0. , 0. , ..., 3.75 ,
  52. 0.9 , 68.62436682],
  53. [ 0. , 0. , 1. , ..., 3.01587302,
  54. 4.1 , 57.25781074]])
  55. >>> y
  56. ... array([0, 0, 0, ..., 0, 0, 1])
  57. >>> y.mean()
  58. ... 0.11278357940223263
  59. >>> sklearn.__version__
  60. '0.20.3'
  61.  
  62. python3.7/site-packages/sklearn/metrics/ranking.py in roc_curve(y_true, y_score, pos_label, sample_weight, drop_intermediate)
  63. 616 """
  64. 617 fps, tps, thresholds = _binary_clf_curve(
  65. --> 618 y_true, y_score, pos_label=pos_label, sample_weight=sample_weight)
  66. 619
  67. 620 # Attempt to drop thresholds corresponding to points in between and
  68.  
  69. python3.7/site-packages/sklearn/metrics/ranking.py in _binary_clf_curve(y_true, y_score, pos_label, sample_weight)
  70. 399 check_consistent_length(y_true, y_score, sample_weight)
  71. 400 y_true = column_or_1d(y_true)
  72. --> 401 y_score = column_or_1d(y_score)
  73. 402 assert_all_finite(y_true)
  74. 403 assert_all_finite(y_score)
  75.  
  76. python3.7/site-packages/sklearn/utils/validation.py in column_or_1d(y, warn)
  77. 795 return np.ravel(y)
  78. 796
  79. --> 797 raise ValueError("bad input shape {0}".format(shape))
  80. 798
  81. 799
  82.  
  83. ValueError: bad input shape ()
  84.  
  85. from sklearn.datasets import make_classification
  86. X_test, y_test = make_classification(100, 23)
  87.  
  88. pipe = Pipeline([
  89. ('scaler', StandardScaler()),
  90. ('classifier', GradientBoostingClassifier()),
  91. ])
  92. params = {
  93. 'scaler__with_mean': [True, False],
  94. 'scaler__with_std': [True, False],
  95. 'classifier__learning_rate': [0.1, 0.5, 1.0],
  96. 'classifier__subsample': [0.5, 1.0],
  97. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement