Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.pipeline import Pipeline
- from sklearn.model_selection import GridSearchCV
- from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler
- from sklearn.decomposition import PCA
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.ensemble import GradientBoostingClassifier
- from pipelinehelper.pipelinehelper import PipelineHelper
- pipe = Pipeline([
- ('scaler', PipelineHelper([
- ('std', StandardScaler()),
- ('abs', MaxAbsScaler()),
- ('minmax', MinMaxScaler()),
- ('pca', PCA(svd_solver='full', whiten=True)),
- ])),
- ('classifier', PipelineHelper([
- ('knn', KNeighborsClassifier(weights='distance')),
- ('gbc', GradientBoostingClassifier())
- ])),
- ])
- params = {
- 'scaler__selected_model': pipe.named_steps['scaler'].generate({
- 'std__with_mean': [True, False],
- 'std__with_std': [True, False],
- 'pca__n_components': [0.5, 0.75, 0.9, 0.99],
- }),
- 'classifier__selected_model': pipe.named_steps['classifier'].generate({
- 'knn__n_neighbors': [1, 3, 5, 7, 10],#, 30, 50, 70, 90, 110, 130, 150, 170, 190],
- 'gbc__learning_rate': [0.1, 0.5, 1.0],
- 'gbc__subsample': [0.5, 1.0],
- })
- }
- grid = GridSearchCV(pipe, params, scoring='roc_auc', n_jobs=1, verbose=1, cv=5)
- grid.fit(X, y)
- >>> X.shape
- ... (13885, 23)
- >>> y.shape
- ... (13885,)
- >>> X
- ... array([[ 0. , 0. , 0. , ..., 7.14285714,
- 0.9 , 35.4644354 ],
- [ 0. , 0. , 0. , ..., 2.11442806,
- 1.2 , 54.99027913],
- [ 1. , 0. , 0. , ..., 2.64959194,
- 0.7 , 70.07380534],
- ...,
- [ 1. , 0. , 0. , ..., 4.375 ,
- 0.5 , 91.85932945],
- [ 1. , 0. , 0. , ..., 3.75 ,
- 0.9 , 68.62436682],
- [ 0. , 0. , 1. , ..., 3.01587302,
- 4.1 , 57.25781074]])
- >>> y
- ... array([0, 0, 0, ..., 0, 0, 1])
- >>> y.mean()
- ... 0.11278357940223263
- >>> sklearn.__version__
- '0.20.3'
- python3.7/site-packages/sklearn/metrics/ranking.py in roc_curve(y_true, y_score, pos_label, sample_weight, drop_intermediate)
- 616 """
- 617 fps, tps, thresholds = _binary_clf_curve(
- --> 618 y_true, y_score, pos_label=pos_label, sample_weight=sample_weight)
- 619
- 620 # Attempt to drop thresholds corresponding to points in between and
- python3.7/site-packages/sklearn/metrics/ranking.py in _binary_clf_curve(y_true, y_score, pos_label, sample_weight)
- 399 check_consistent_length(y_true, y_score, sample_weight)
- 400 y_true = column_or_1d(y_true)
- --> 401 y_score = column_or_1d(y_score)
- 402 assert_all_finite(y_true)
- 403 assert_all_finite(y_score)
- python3.7/site-packages/sklearn/utils/validation.py in column_or_1d(y, warn)
- 795 return np.ravel(y)
- 796
- --> 797 raise ValueError("bad input shape {0}".format(shape))
- 798
- 799
- ValueError: bad input shape ()
- from sklearn.datasets import make_classification
- X_test, y_test = make_classification(100, 23)
- pipe = Pipeline([
- ('scaler', StandardScaler()),
- ('classifier', GradientBoostingClassifier()),
- ])
- params = {
- 'scaler__with_mean': [True, False],
- 'scaler__with_std': [True, False],
- 'classifier__learning_rate': [0.1, 0.5, 1.0],
- 'classifier__subsample': [0.5, 1.0],
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement