SHARE
TWEET

Untitled

a guest Jun 27th, 2019 70 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from sklearn.pipeline import Pipeline
  2. from sklearn.model_selection import GridSearchCV
  3. from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler
  4. from sklearn.decomposition import PCA
  5. from sklearn.neighbors import KNeighborsClassifier
  6. from sklearn.ensemble import GradientBoostingClassifier
  7. from pipelinehelper.pipelinehelper import PipelineHelper
  8.  
  9. pipe = Pipeline([
  10.     ('scaler', PipelineHelper([
  11.         ('std', StandardScaler()),
  12.         ('abs', MaxAbsScaler()),
  13.         ('minmax', MinMaxScaler()),
  14.         ('pca', PCA(svd_solver='full', whiten=True)),
  15.     ])),
  16.     ('classifier', PipelineHelper([
  17.         ('knn', KNeighborsClassifier(weights='distance')),
  18.         ('gbc', GradientBoostingClassifier())
  19.     ])),
  20. ])
  21. params = {
  22.     'scaler__selected_model': pipe.named_steps['scaler'].generate({
  23.         'std__with_mean': [True, False],
  24.         'std__with_std': [True, False],
  25.         'pca__n_components': [0.5, 0.75, 0.9, 0.99],
  26.     }),
  27.     'classifier__selected_model': pipe.named_steps['classifier'].generate({
  28.         'knn__n_neighbors': [1, 3, 5, 7, 10],#, 30, 50, 70, 90, 110, 130, 150, 170, 190],
  29.         'gbc__learning_rate': [0.1, 0.5, 1.0],
  30.         'gbc__subsample': [0.5, 1.0],
  31.     })
  32. }
  33.  
  34. grid = GridSearchCV(pipe, params, scoring='roc_auc', n_jobs=1, verbose=1, cv=5)
  35. grid.fit(X, y)
  36.      
  37. >>> X.shape
  38. ... (13885, 23)
  39. >>> y.shape
  40. ... (13885,)
  41. >>> X
  42. ... array([[ 0.        ,  0.        ,  0.        , ...,  7.14285714,
  43.          0.9       , 35.4644354 ],
  44.        [ 0.        ,  0.        ,  0.        , ...,  2.11442806,
  45.          1.2       , 54.99027913],
  46.        [ 1.        ,  0.        ,  0.        , ...,  2.64959194,
  47.          0.7       , 70.07380534],
  48.        ...,
  49.        [ 1.        ,  0.        ,  0.        , ...,  4.375     ,
  50.          0.5       , 91.85932945],
  51.        [ 1.        ,  0.        ,  0.        , ...,  3.75      ,
  52.          0.9       , 68.62436682],
  53.        [ 0.        ,  0.        ,  1.        , ...,  3.01587302,
  54.          4.1       , 57.25781074]])
  55. >>> y
  56. ... array([0, 0, 0, ..., 0, 0, 1])
  57. >>> y.mean()
  58. ... 0.11278357940223263
  59. >>> sklearn.__version__
  60. '0.20.3'
  61.      
  62. python3.7/site-packages/sklearn/metrics/ranking.py in roc_curve(y_true, y_score, pos_label, sample_weight, drop_intermediate)
  63.     616     """
  64.     617     fps, tps, thresholds = _binary_clf_curve(
  65. --> 618         y_true, y_score, pos_label=pos_label, sample_weight=sample_weight)
  66.     619
  67.     620     # Attempt to drop thresholds corresponding to points in between and
  68.  
  69. python3.7/site-packages/sklearn/metrics/ranking.py in _binary_clf_curve(y_true, y_score, pos_label, sample_weight)
  70.     399     check_consistent_length(y_true, y_score, sample_weight)
  71.     400     y_true = column_or_1d(y_true)
  72. --> 401     y_score = column_or_1d(y_score)
  73.     402     assert_all_finite(y_true)
  74.     403     assert_all_finite(y_score)
  75.  
  76. python3.7/site-packages/sklearn/utils/validation.py in column_or_1d(y, warn)
  77.     795         return np.ravel(y)
  78.     796
  79. --> 797     raise ValueError("bad input shape {0}".format(shape))
  80.     798
  81.     799
  82.  
  83. ValueError: bad input shape ()
  84.      
  85. from sklearn.datasets import make_classification
  86. X_test, y_test = make_classification(100, 23)
  87.      
  88. pipe = Pipeline([
  89.     ('scaler', StandardScaler()),
  90.     ('classifier', GradientBoostingClassifier()),
  91. ])
  92. params = {
  93.     'scaler__with_mean': [True, False],
  94.     'scaler__with_std': [True, False],
  95.     'classifier__learning_rate': [0.1, 0.5, 1.0],
  96.     'classifier__subsample': [0.5, 1.0],
  97. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top