Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Test Train Split
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn import datasets
- from sklearn import svm
- iris = datasets.load_iris()
- iris.data.shape, iris.target.shape
- #Can quickly sample training set while holding out 40% for testing our classifier
- X_train, X_test, y_train, y_test = train_test_split(
- iris.data, iris.target, test_size=0.4, random_state=0)
- X_train.shape, y_train.shape
- X_test.shape, y_test.shape
- clf = svm.SVC(kernel='linear', C=1).fit(X_train, y_train)
- clf.score(X_test, y_test)
- #------------
- ## Linear Regression
- print(__doc__)
- # Code source: Jaques Grobler
- # License: BSD 3 clause
- import matplotlib.pyplot as plt
- import numpy as np
- from sklearn import datasets, linear_model
- from sklearn.metrics import mean_squared_error, r2_score
- # Load the diabetes dataset
- diabetes = datasets.load_diabetes()
- # Use only one feature
- diabetes_X = diabetes.data[:, np.newaxis, 2]
- # Split the data into training/testing sets
- diabetes_X_train = diabetes_X[:-20]
- diabetes_X_test = diabetes_X[-20:]
- # Split the targets into training/testing sets
- diabetes_y_train = diabetes.target[:-20]
- diabetes_y_test = diabetes.target[-20:]
- # Create linear regression object
- regr = linear_model.LinearRegression()
- # Train the model using the training sets
- regr.fit(diabetes_X_train, diabetes_y_train)
- # Make predictions using the testing set
- diabetes_y_pred = regr.predict(diabetes_X_test)
- # The coefficients
- print('Coefficients: \n', regr.coef_)
- # The mean squared error
- print("Mean squared error: %.2f"
- % mean_squared_error(diabetes_y_test, diabetes_y_pred))
- # Explained variance score: 1 is perfect prediction
- print('Variance score: %.2f' % r2_score(diabetes_y_test, diabetes_y_pred))
- # Plot outputs
- plt.scatter(diabetes_X_test, diabetes_y_test, color='black')
- plt.plot(diabetes_X_test, diabetes_y_pred, color='blue', linewidth=3)
- plt.xticks(())
- plt.yticks(())
- plt.show()
- #--------------
- #KNN
- print(__doc__)
- import numpy as np
- import matplotlib.pyplot as plt
- from matplotlib.colors import ListedColormap
- from sklearn import neighbors, datasets
- n_neighbors = 15
- # import some data to play with
- iris = datasets.load_iris()
- # we only take the first two features. We could avoid this ugly
- # slicing by using a two-dim dataset
- X = iris.data[:, :2]
- y = iris.target
- h = .02 # step size in the mesh
- # Create color maps
- cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
- cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF'])
- for weights in ['uniform', 'distance']:
- # we create an instance of Neighbours Classifier and fit the data.
- clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
- clf.fit(X, y)
- # Plot the decision boundary. For that, we will assign a color to each
- # point in the mesh [x_min, x_max]x[y_min, y_max].
- x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
- y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
- xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
- np.arange(y_min, y_max, h))
- Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
- # Put the result into a color plot
- Z = Z.reshape(xx.shape)
- plt.figure()
- plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
- # Plot also the training points
- plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold,
- edgecolor='k', s=20)
- plt.xlim(xx.min(), xx.max())
- plt.ylim(yy.min(), yy.max())
- plt.title("3-Class classification (k = %i, weights = '%s')"
- % (n_neighbors, weights))
- plt.show()
- ### -------
- # SVM
- print(__doc__)
- import numpy as np
- import matplotlib.pyplot as plt
- from sklearn import svm
- # we create 40 separable points
- np.random.seed(0)
- X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]]
- Y = [0] * 20 + [1] * 20
- # fit the model
- clf = svm.SVC(kernel='linear')
- clf.fit(X, Y)
- # get the separating hyperplane
- w = clf.coef_[0]
- a = -w[0] / w[1]
- xx = np.linspace(-5, 5)
- yy = a * xx - (clf.intercept_[0]) / w[1]
- # plot the parallels to the separating hyperplane that pass through the
- # support vectors
- b = clf.support_vectors_[0]
- yy_down = a * xx + (b[1] - a * b[0])
- b = clf.support_vectors_[-1]
- yy_up = a * xx + (b[1] - a * b[0])
- # plot the line, the points, and the nearest vectors to the plane
- plt.plot(xx, yy, 'k-')
- plt.plot(xx, yy_down, 'k--')
- plt.plot(xx, yy_up, 'k--')
- plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1],
- s=80, facecolors='none')
- plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)
- plt.axis('tight')
- plt.show()
- #-------
- #ROC curve
- print(__doc__)
- import numpy as np
- import matplotlib.pyplot as plt
- from itertools import cycle
- from sklearn import svm, datasets
- from sklearn.metrics import roc_curve, auc
- from sklearn.model_selection import train_test_split
- from sklearn.preprocessing import label_binarize
- from sklearn.multiclass import OneVsRestClassifier
- from scipy import interp
- # Import some data to play with
- iris = datasets.load_iris()
- X = iris.data
- y = iris.target
- # Binarize the output
- y = label_binarize(y, classes=[0, 1, 2])
- n_classes = y.shape[1]
- # Add noisy features to make the problem harder
- random_state = np.random.RandomState(0)
- n_samples, n_features = X.shape
- X = np.c_[X, random_state.randn(n_samples, 200 * n_features)]
- # shuffle and split training and test sets
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5,
- random_state=0)
- # Learn to predict each class against the other
- classifier = OneVsRestClassifier(svm.SVC(kernel='linear', probability=True,
- random_state=random_state))
- y_score = classifier.fit(X_train, y_train).decision_function(X_test)
- # Compute ROC curve and ROC area for each class
- fpr = dict()
- tpr = dict()
- roc_auc = dict()
- for i in range(n_classes):
- fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
- roc_auc[i] = auc(fpr[i], tpr[i])
- # Compute micro-average ROC curve and ROC area
- fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel())
- roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
- plt.figure()
- lw = 2
- plt.plot(fpr[2], tpr[2], color='darkorange',
- lw=lw, label='ROC curve (area = %0.2f)' % roc_auc[2])
- plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
- plt.xlim([0.0, 1.0])
- plt.ylim([0.0, 1.05])
- plt.xlabel('False Positive Rate')
- plt.ylabel('True Positive Rate')
- plt.title('Receiver operating characteristic example')
- plt.legend(loc="lower right")
- plt.show()
- #--------------
- # PCA
- # Code source: Gaël Varoquaux
- # License: BSD 3 clause
- import numpy as np
- import matplotlib.pyplot as plt
- from mpl_toolkits.mplot3d import Axes3D
- from sklearn import decomposition
- from sklearn import datasets
- np.random.seed(5)
- centers = [[1, 1], [-1, -1], [1, -1]]
- iris = datasets.load_iris()
- X = iris.data
- y = iris.target
- fig = plt.figure(1, figsize=(4, 3))
- pca = decomposition.PCA(n_components=2)
- pca.fit(X)
- X = pca.transform(X)
- plt.scatter(X[:, 0], X[:, 1],c=y)
- plt.show()
- #setosa, versicolour and virginica in purple, green and yellow
- #--------------
Add Comment
Please, Sign In to add comment