SHOW:
|
|
- or go back to the newest paste.
1 | pca = ('reduce_dim', PCA(n_components = N_features / 2)) | |
2 | svc = ('svc', SVC(kernel = 'poly', C = 10, degree = 4, max_iter = 10000000)) | |
3 | kmeans = ('kmeans', KMeans(n_clusters = 5)) | |
4 | tree = ('tree', DecisionTreeClassifier()) | |
5 | transform = ('anova', feature_selection.SelectPercentile(feature_selection.f_classif, percentile = 50)) | |
6 | ||
7 | parameters = { | |
8 | "reduce_dim__n_components": [N_features / 2, N_features / 3, N_features / 4], | |
9 | "kmeans__n_clusters": [3, 6, 8, 10] | |
10 | } | |
11 | ||
12 | estimators = [ | |
13 | pca, | |
14 | kmeans, | |
15 | tree | |
16 | ] | |
17 | pipe = Pipeline(estimators) | |
18 | grid = GridSearchCV(pipe, param_grid = parameters, n_jobs=-1, cv = KFold(n_splits=3, random_state = 42)) | |
19 | clf = grid | |
20 | ||
21 | ||
22 | # Example starting point. Try investigating other evaluation techniques! | |
23 | from sklearn.model_selection import train_test_split | |
24 | features_train, features_test, labels_train, labels_test = \ | |
25 | train_test_split(features, labels, test_size=0.3, random_state=42, shuffle = False) | |
26 | ||
27 | clf.fit(features, labels) | |
28 | print clf.best_score_ |