View difference between Paste ID: BnzmAWLc and eB6cZsqt
SHOW: | | - or go back to the newest paste.
1
pca = ('reduce_dim', PCA(n_components = N_features / 2))
2
svc = ('svc', SVC(kernel = 'poly', C = 10, degree = 4, max_iter = 10000000))
3
kmeans = ('kmeans', KMeans(n_clusters = 5))
4
tree = ('tree', DecisionTreeClassifier())
5
transform = ('anova', feature_selection.SelectPercentile(feature_selection.f_classif, percentile = 50))
6
7
parameters = {
8
    "reduce_dim__n_components": [N_features / 2, N_features / 3, N_features / 4],
9
    "kmeans__n_clusters": [3, 6, 8, 10]
10
}
11
12
estimators = [
13
    pca,
14
    kmeans,
15
    tree
16
]
17
pipe = Pipeline(estimators)
18
grid = GridSearchCV(pipe, param_grid = parameters, n_jobs=-1, cv = KFold(n_splits=3, random_state = 42))
19
clf = grid
20
21
22
# Example starting point. Try investigating other evaluation techniques!
23
from sklearn.model_selection import train_test_split
24
features_train, features_test, labels_train, labels_test = \
25
    train_test_split(features, labels, test_size=0.3, random_state=42, shuffle = False)
26
27
clf.fit(features, labels)
28
print clf.best_score_