Guest User

Untitled

a guest
May 22nd, 2018
108
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.95 KB | None | 0 0
  1. #Test Train Split
  2. import numpy as np
  3. from sklearn.model_selection import train_test_split
  4. from sklearn import datasets
  5. from sklearn import svm
  6.  
  7. iris = datasets.load_iris()
  8. iris.data.shape, iris.target.shape
  9.  
  10. #Can quickly sample training set while holding out 40% for testing our classifier
  11. X_train, X_test, y_train, y_test = train_test_split(
  12. iris.data, iris.target, test_size=0.4, random_state=0)
  13.  
  14. X_train.shape, y_train.shape
  15.  
  16. X_test.shape, y_test.shape
  17.  
  18.  
  19. clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
  20. clf.score(X_test, y_test)
  21.  
  22. #------------
  23. ## Linear Regression
  24. print(__doc__)
  25.  
  26.  
  27. # Code source: Jaques Grobler
  28. # License: BSD 3 clause
  29.  
  30.  
  31. import matplotlib.pyplot as plt
  32. import numpy as np
  33. from sklearn import datasets, linear_model
  34. from sklearn.metrics import mean_squared_error, r2_score
  35.  
  36. # Load the diabetes dataset
  37. diabetes = datasets.load_diabetes()
  38.  
  39.  
  40. # Use only one feature
  41. diabetes_X = diabetes.data[:, np.newaxis, 2]
  42.  
  43. # Split the data into training/testing sets
  44. diabetes_X_train = diabetes_X[:-20]
  45. diabetes_X_test = diabetes_X[-20:]
  46.  
  47. # Split the targets into training/testing sets
  48. diabetes_y_train = diabetes.target[:-20]
  49. diabetes_y_test = diabetes.target[-20:]
  50.  
  51. # Create linear regression object
  52. regr = linear_model.LinearRegression()
  53.  
  54. # Train the model using the training sets
  55. regr.fit(diabetes_X_train, diabetes_y_train)
  56.  
  57. # Make predictions using the testing set
  58. diabetes_y_pred = regr.predict(diabetes_X_test)
  59.  
  60. # The coefficients
  61. print('Coefficients: \n', regr.coef_)
  62. # The mean squared error
  63. print("Mean squared error: %.2f"
  64. % mean_squared_error(diabetes_y_test, diabetes_y_pred))
  65. # Explained variance score: 1 is perfect prediction
  66. print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred))
  67.  
  68. # Plot outputs
  69. plt.scatter(diabetes_X_test, diabetes_y_test, color='black')
  70. plt.plot(diabetes_X_test, diabetes_y_pred, color='blue', linewidth=3)
  71.  
  72. plt.xticks(())
  73. plt.yticks(())
  74.  
  75. plt.show()
  76.  
  77.  
  78.  
  79.  
  80. #--------------
  81.  
  82. #KNN
  83.  
  84. print(__doc__)
  85.  
  86. import numpy as np
  87. import matplotlib.pyplot as plt
  88. from matplotlib.colors import ListedColormap
  89. from sklearn import neighbors, datasets
  90.  
  91. n_neighbors = 15
  92.  
  93. # import some data to play with
  94. iris = datasets.load_iris()
  95.  
  96. # we only take the first two features. We could avoid this ugly
  97. # slicing by using a two-dim dataset
  98. X = iris.data[:, :2]
  99. y = iris.target
  100.  
  101. h = .02 # step size in the mesh
  102.  
  103. # Create color maps
  104. cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
  105. cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
  106.  
  107. for weights in ['uniform', 'distance']:
  108. # we create an instance of Neighbours Classifier and fit the data.
  109. clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
  110. clf.fit(X, y)
  111.  
  112. # Plot the decision boundary. For that, we will assign a color to each
  113. # point in the mesh [x_min, x_max]x[y_min, y_max].
  114. x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
  115. y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
  116. xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
  117. np.arange(y_min, y_max, h))
  118. Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
  119.  
  120. # Put the result into a color plot
  121. Z = Z.reshape(xx.shape)
  122. plt.figure()
  123. plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
  124.  
  125. # Plot also the training points
  126. plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold,
  127. edgecolor='k', s=20)
  128. plt.xlim(xx.min(), xx.max())
  129. plt.ylim(yy.min(), yy.max())
  130. plt.title("3-Class classification (k = %i, weights = '%s')"
  131. % (n_neighbors, weights))
  132.  
  133. plt.show()
  134.  
  135. ### -------
  136. # SVM
  137. print(__doc__)
  138.  
  139. import numpy as np
  140. import matplotlib.pyplot as plt
  141. from sklearn import svm
  142.  
  143. # we create 40 separable points
  144. np.random.seed(0)
  145. X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]]
  146. Y = [0] * 20 + [1] * 20
  147.  
  148. # fit the model
  149. clf = svm.SVC(kernel='linear')
  150. clf.fit(X, Y)
  151.  
  152. # get the separating hyperplane
  153. w = clf.coef_[0]
  154. a = -w[0] / w[1]
  155. xx = np.linspace(-5, 5)
  156. yy = a * xx - (clf.intercept_[0]) / w[1]
  157.  
  158. # plot the parallels to the separating hyperplane that pass through the
  159. # support vectors
  160. b = clf.support_vectors_[0]
  161. yy_down = a * xx + (b[1] - a * b[0])
  162. b = clf.support_vectors_[-1]
  163. yy_up = a * xx + (b[1] - a * b[0])
  164.  
  165. # plot the line, the points, and the nearest vectors to the plane
  166. plt.plot(xx, yy, 'k-')
  167. plt.plot(xx, yy_down, 'k--')
  168. plt.plot(xx, yy_up, 'k--')
  169.  
  170. plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1],
  171. s=80, facecolors='none')
  172. plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)
  173.  
  174. plt.axis('tight')
  175. plt.show()
  176. #-------
  177. #ROC curve
  178. print(__doc__)
  179.  
  180. import numpy as np
  181. import matplotlib.pyplot as plt
  182. from itertools import cycle
  183.  
  184. from sklearn import svm, datasets
  185. from sklearn.metrics import roc_curve, auc
  186. from sklearn.model_selection import train_test_split
  187. from sklearn.preprocessing import label_binarize
  188. from sklearn.multiclass import OneVsRestClassifier
  189. from scipy import interp
  190.  
  191. # Import some data to play with
  192. iris = datasets.load_iris()
  193. X = iris.data
  194. y = iris.target
  195.  
  196. # Binarize the output
  197. y = label_binarize(y, classes=[0, 1, 2])
  198. n_classes = y.shape[1]
  199.  
  200. # Add noisy features to make the problem harder
  201. random_state = np.random.RandomState(0)
  202. n_samples, n_features = X.shape
  203. X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]
  204.  
  205. # shuffle and split training and test sets
  206. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5,
  207. random_state=0)
  208.  
  209. # Learn to predict each class against the other
  210. classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True,
  211. random_state=random_state))
  212. y_score = classifier.fit(X_train, y_train).decision_function(X_test)
  213.  
  214. # Compute ROC curve and ROC area for each class
  215. fpr = dict()
  216. tpr = dict()
  217. roc_auc = dict()
  218. for i in range(n_classes):
  219. fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
  220. roc_auc[i] = auc(fpr[i], tpr[i])
  221.  
  222. # Compute micro-average ROC curve and ROC area
  223. fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
  224. roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
  225.  
  226.  
  227. plt.figure()
  228. lw = 2
  229. plt.plot(fpr[2], tpr[2], color='darkorange',
  230. lw=lw, label='ROC curve (area = %0.2f)' % roc_auc[2])
  231. plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
  232. plt.xlim([0.0, 1.0])
  233. plt.ylim([0.0, 1.05])
  234. plt.xlabel('False Positive Rate')
  235. plt.ylabel('True Positive Rate')
  236. plt.title('Receiver operating characteristic example')
  237. plt.legend(loc="lower right")
  238. plt.show()
  239.  
  240. #--------------
  241. # PCA
  242. # Code source: Gaël Varoquaux
  243. # License: BSD 3 clause
  244.  
  245. import numpy as np
  246. import matplotlib.pyplot as plt
  247. from mpl_toolkits.mplot3d import Axes3D
  248.  
  249.  
  250. from sklearn import decomposition
  251. from sklearn import datasets
  252.  
  253. np.random.seed(5)
  254.  
  255. centers = [[1, 1], [-1, -1], [1, -1]]
  256. iris = datasets.load_iris()
  257. X = iris.data
  258. y = iris.target
  259.  
  260. fig = plt.figure(1, figsize=(4, 3))
  261. pca = decomposition.PCA(n_components=2)
  262. pca.fit(X)
  263. X = pca.transform(X)
  264.  
  265. plt.scatter(X[:, 0], X[:, 1],c=y)
  266. plt.show()
  267. #setosa, versicolour and virginica in purple, green and yellow
  268.  
  269. #--------------
Add Comment
Please, Sign In to add comment