Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- # coding: utf-8
- # In[64]:
- import os
- import numpy as np
- import csv
- # In[65]:
- home_dir = (os.path.expanduser("~"))
- print(home_dir)
- # In[66]:
- data_filename = ".\ionosphere.data.txt"
- print(data_filename)
- # In[67]:
- X = np.zeros((351, 34), dtype='float')
- y = np.zeros((351,), dtype='bool')
- # In[68]:
- with open(data_filename, 'r') as input_file:
- reader = csv.reader(input_file)
- for i, row in enumerate(reader):
- # Get the data, converting each item to a float
- data = [float(datum) for datum in row[:-1]]
- # Set the appropriate row in our dataset
- X[i] = data
- # 1 if the class is 'g', 0 otherwise
- y[i] = row[-1] == 'g'
- # In[69]:
- print(X[1][-1])
- # In[70]:
- from sklearn.model_selection import train_test_split
- X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=14)
- print("There are {} samples in the training dataset".format(X_train.shape[0]))
- print("There are {} samples in the testing dataset".format(X_test.shape[0]))
- print("Each sample has {} features".format(X_train.shape[1]))
- # In[71]:
- from sklearn.neighbors import KNeighborsClassifier
- estimator = KNeighborsClassifier()
- print(estimator)
- # In[72]:
- estimator.fit(X_train, y_train)
- # In[73]:
- y_predicted = estimator.predict(X_test)
- accuracy = np.mean(y_test == y_predicted) * 100
- print("The accuracy is {0:.1f}%".format(accuracy))
- # In[74]:
- # uses Stratified K-fold
- from sklearn.model_selection import cross_val_score
- # In[76]:
- scores = cross_val_score(estimator, X, y, scoring='accuracy', cv=3)
- #average_accuracy = np.mean(scores) * 100
- #print("The average accuracy is {0:.1f}%".format(average_accuracy))
- # In[77]:
- #setting parameters
- avg_scores = []
- all_scores = []
- parameter_values = list(range(1, 21)) # Including 20
- for n_neighbors in parameter_values:
- estimator = KNeighborsClassifier(n_neighbors=n_neighbors)
- scores = cross_val_score(estimator, X, y, scoring='accuracy', cv=3)
- avg_scores.append(np.mean(scores))
- all_scores.append(scores)
- # In[55]:
- get_ipython().run_line_magic('matplotlib', 'inline')
- # In[56]:
- from matplotlib import pyplot as plt
- plt.plot(parameter_values, avg_scores, '-o')
- # In[57]:
- x_broken = np.array(X)
- x_broken[:,::2] /=10
- # In[58]:
- esimator = KNeighborsClassifier()
- original_scores = cross_val_score(esimator, X, y, scoring='accuracy')
- print("The original average accuracy for is {0:.1f}%".format(np.mean(original_scores) * 100))
- broken_scores = cross_val_score(esimator, x_broken,y, scoring='accuracy')
- print("The broken average accuracy for is {0:.1f}%".format(np.mean(broken_scores) * 100))
- # In[59]:
- from sklearn.preprocessing import MinMaxScaler
- x_transformed = MinMaxScaler().fit_transform(x_broken)
- estimator = KNeighborsClassifier()
- transformed_scores = cross_val_score(esimator, x_transformed, y, scoring='accuracy')
- print("The average accuracy for is {0:.1f}%".format(np.mean(transformed_scores) * 100))
- # In[60]:
- from sklearn.preprocessing import MinMaxScaler
- x_transformed = MinMaxScaler().fit_transform(x_broken)
- estimator = KNeighborsClassifier()
- transformed_scores = cross_val_score(esimator, x_transformed, y, scoring='accuracy')
- print("The average accuracy for is {0:.1f}%".format(np.mean(transformed_scores) * 100))
- # In[61]:
- from sklearn.pipeline import Pipeline
- # In[62]:
- scaling_pipeline = ([
- ('scale', MinMaxScaler()),
- ('predict', KNeighborsClassifier())])
- print(scaling_pipeline)
- # In[63]:
- scores = cross_val_score(scaling_pipeline, x_broken, y, scoring='accuracy')
- print("the pipeline scored an average accuracy for is {0:.1f}%".format(np.mean(transformed_scores)* 100))
- # In[ ]:
Add Comment
Please, Sign In to add comment