Guest User

Untitled

a guest
Feb 17th, 2019
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.60 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # coding: utf-8
  3.  
  4. # In[64]:
  5.  
  6.  
  7. import os
  8. import numpy as np
  9. import csv
  10.  
  11.  
  12. # In[65]:
  13.  
  14.  
  15. home_dir = (os.path.expanduser("~"))
  16. print(home_dir)
  17.  
  18.  
  19. # In[66]:
  20.  
  21.  
  22. data_filename = ".\ionosphere.data.txt"
  23. print(data_filename)
  24.  
  25.  
  26. # In[67]:
  27.  
  28.  
  29. X = np.zeros((351, 34), dtype='float')
  30. y = np.zeros((351,), dtype='bool')
  31.  
  32.  
  33. # In[68]:
  34.  
  35.  
  36. with open(data_filename, 'r') as input_file:
  37. reader = csv.reader(input_file)
  38. for i, row in enumerate(reader):
  39. # Get the data, converting each item to a float
  40. data = [float(datum) for datum in row[:-1]]
  41. # Set the appropriate row in our dataset
  42. X[i] = data
  43. # 1 if the class is 'g', 0 otherwise
  44. y[i] = row[-1] == 'g'
  45.  
  46.  
  47. # In[69]:
  48.  
  49.  
  50. print(X[1][-1])
  51.  
  52.  
  53. # In[70]:
  54.  
  55.  
  56. from sklearn.model_selection import train_test_split
  57. X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=14)
  58. print("There are {} samples in the training dataset".format(X_train.shape[0]))
  59. print("There are {} samples in the testing dataset".format(X_test.shape[0]))
  60. print("Each sample has {} features".format(X_train.shape[1]))
  61.  
  62.  
  63. # In[71]:
  64.  
  65.  
  66. from sklearn.neighbors import KNeighborsClassifier
  67. estimator = KNeighborsClassifier()
  68. print(estimator)
  69.  
  70.  
  71. # In[72]:
  72.  
  73.  
  74. estimator.fit(X_train, y_train)
  75.  
  76.  
  77. # In[73]:
  78.  
  79.  
  80. y_predicted = estimator.predict(X_test)
  81. accuracy = np.mean(y_test == y_predicted) * 100
  82. print("The accuracy is {0:.1f}%".format(accuracy))
  83.  
  84.  
  85. # In[74]:
  86.  
  87.  
  88. # uses Stratified K-fold
  89. from sklearn.model_selection import cross_val_score
  90.  
  91.  
  92. # In[76]:
  93.  
  94.  
  95. scores = cross_val_score(estimator, X, y, scoring='accuracy', cv=3)
  96. #average_accuracy = np.mean(scores) * 100
  97. #print("The average accuracy is {0:.1f}%".format(average_accuracy))
  98.  
  99.  
  100. # In[77]:
  101.  
  102.  
  103. #setting parameters
  104. avg_scores = []
  105. all_scores = []
  106. parameter_values = list(range(1, 21)) # Including 20
  107. for n_neighbors in parameter_values:
  108. estimator = KNeighborsClassifier(n_neighbors=n_neighbors)
  109. scores = cross_val_score(estimator, X, y, scoring='accuracy', cv=3)
  110. avg_scores.append(np.mean(scores))
  111. all_scores.append(scores)
  112.  
  113.  
  114. # In[55]:
  115.  
  116.  
  117. get_ipython().run_line_magic('matplotlib', 'inline')
  118.  
  119.  
  120. # In[56]:
  121.  
  122.  
  123. from matplotlib import pyplot as plt
  124. plt.plot(parameter_values, avg_scores, '-o')
  125.  
  126.  
  127. # In[57]:
  128.  
  129.  
  130. x_broken = np.array(X)
  131. x_broken[:,::2] /=10
  132.  
  133.  
  134. # In[58]:
  135.  
  136.  
  137. esimator = KNeighborsClassifier()
  138. original_scores = cross_val_score(esimator, X, y, scoring='accuracy')
  139. print("The original average accuracy for is {0:.1f}%".format(np.mean(original_scores) * 100))
  140. broken_scores = cross_val_score(esimator, x_broken,y, scoring='accuracy')
  141. print("The broken average accuracy for is {0:.1f}%".format(np.mean(broken_scores) * 100))
  142.  
  143.  
  144. # In[59]:
  145.  
  146.  
  147. from sklearn.preprocessing import MinMaxScaler
  148. x_transformed = MinMaxScaler().fit_transform(x_broken)
  149. estimator = KNeighborsClassifier()
  150. transformed_scores = cross_val_score(esimator, x_transformed, y, scoring='accuracy')
  151. print("The average accuracy for is {0:.1f}%".format(np.mean(transformed_scores) * 100))
  152.  
  153.  
  154. # In[60]:
  155.  
  156.  
  157. from sklearn.preprocessing import MinMaxScaler
  158. x_transformed = MinMaxScaler().fit_transform(x_broken)
  159. estimator = KNeighborsClassifier()
  160. transformed_scores = cross_val_score(esimator, x_transformed, y, scoring='accuracy')
  161. print("The average accuracy for is {0:.1f}%".format(np.mean(transformed_scores) * 100))
  162.  
  163.  
  164. # In[61]:
  165.  
  166.  
  167. from sklearn.pipeline import Pipeline
  168.  
  169.  
  170. # In[62]:
  171.  
  172.  
  173. scaling_pipeline = ([
  174. ('scale', MinMaxScaler()),
  175. ('predict', KNeighborsClassifier())])
  176. print(scaling_pipeline)
  177.  
  178.  
  179. # In[63]:
  180.  
  181.  
  182. scores = cross_val_score(scaling_pipeline, x_broken, y, scoring='accuracy')
  183. print("the pipeline scored an average accuracy for is {0:.1f}%".format(np.mean(transformed_scores)* 100))
  184.  
  185.  
  186. # In[ ]:
Add Comment
Please, Sign In to add comment