Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #import math
- #import matplotlib.pyplot as plt
- #import numpy as np
- #import os
- #import pandas as pd
- #import csv
- #
- #Zad 1
- #def sigmoid(x):
- # z = math.e**(-x)
- # return 1 / (1 + z)
- #
- #
- #x_s = np.arange(-10, 10, 0.1)
- #y_s = sigmoid(x_s)
- #
- #plt.figure()
- #plt.plot(x_s, y_s, color = "blue")
- #plt.show()
- #
- #
- #
- #Zadanie 2
- #
- #
- #from sklearn.model_selection import train_test_split
- #from sklearn import metrics
- #from sklearn.linear_model import LogisticRegression
- #from sklearn import datasets
- #import seaborn as sns
- #
- #current_dir = os.path.abspath(os.path.dirname(__file__))
- #csv_path = os.path.join(current_dir, "diabetes.csv")
- #diab_data = pd.read_csv(csv_path)
- #
- #
- ##rozdzielamy dane wg nazw
- #feature_names = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']
- #
- #X = diab_data[feature_names]
- #y = diab_data.Outcome
- #
- #X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
- #
- #print("{0:0.2f}% data is in training set".format((len(X_train)/len( diab_data.index)) * 100))
- #print("{0:0.2f}% data is in test set".format((len(X_test)/len( diab_data.index)) * 100))
- #
- #
- #
- #lregr=LogisticRegression()
- #lregr.fit(X_train, y_train)
- #pred_1 = lregr.predict(X_test)
- #
- ###w sumie fajna notatka
- ###A confusion matrix is a table that is used to evaluate the performance of a classification model.
- ###You can also visualize the performance of an algorithm. The fundamental of a confusion matrix is the number of correct
- ###and incorrect predictions are summed up class-wise.
- #
- #conf_matrix = metrics.confusion_matrix(y_test, pred_1)
- #class_names=[0,1]
- #fig, ax = plt.subplots()
- #tick_marks = [0,1]
- #plt.xticks(tick_marks, class_names)
- #plt.yticks(tick_marks, class_names)
- #sns.heatmap(pd.DataFrame(conf_matrix), annot=True, cmap="YlGnBu" ,fmt='g')
- #ax.xaxis.set_label_position("top")
- #plt.tight_layout()
- #plt.title('Confusion matrix', y=1.1)
- #plt.ylabel('Actual label')
- #plt.xlabel('Predicted label')
- #
- #print("Original Diabetes True Values : {0} ({1:0.2f}%)".format(len(diabetes_mod.loc[diabetes_mod['Outcome'] == 1]), (len(diabetes_mod.loc[diabetes_mod['Outcome'] == 1])/len(diabetes_mod.index)) * 100))
- #print("Original Diabetes False Values : {0} ({1:0.2f}%)".format(len(diabetes_mod.loc[diabetes_mod['Outcome'] == 0]), (len(diabetes_mod.loc[diabetes_mod['Outcome'] == 0])/len(diabetes_mod.index)) * 100))
- #print("")
- #print("Training Diabetes True Values : {0} ({1:0.2f}%)".format(len(y_train[y_train[:] == 1]), (len(y_train[y_train[:] == 1])/len(y_train)) * 100))
- #print("Training Diabetes False Values : {0} ({1:0.2f}%)".format(len(y_train[y_train[:] == 0]), (len(y_train[y_train[:] == 0])/len(y_train)) * 100))
- #print("")
- #print("Test Diabetes True Values : {0} ({1:0.2f}%)".format(len(y_test[y_test[:] == 1]), (len(y_test[y_test[:] == 1])/len(y_test)) * 100))
- #print("Test Diabetes False Values : {0} ({1:0.2f}%)".format(len(y_test[y_test[:] == 0]), (len(y_test[y_test[:] == 0])/len(y_test)) * 100))
- #print("")
- #
- #db = diabetes_mod.corr()
- #
- #def plot_corr(df, size=11):
- # corr = df.corr()
- # fig, ax = plt.subplots(figsize=(size, size))
- # ax.matshow(corr)
- # plt.xticks(range(len(corr.columns)), corr.columns)
- # plt.yticks(range(len(corr.columns)), corr.columns)
- #
- #plot_corr(db)
- #Zadanie 3
- #
- #from sklearn.datasets.samples_generator import make_blobs
- #from sklearn import svm
- #
- #X, Y = make_blobs(n_samples=50, centers=2, cluster_std=0.60)
- ## fit the model, don't regularize for illustration purposes
- #clf = svm.SVC(kernel='linear', C=1000)
- #clf.fit(X, Y)
- #
- #plt.scatter(X[:, 0], X[:, 1], c=Y, s=30, cmap=plt.cm.Paired)
- #
- ## plot the decision function
- #ax = plt.gca()
- #xlim = ax.get_xlim()
- #ylim = ax.get_ylim()
- #
- ## create grid to evaluate model
- #xx = np.linspace(xlim[0], xlim[1], 30)
- #yy = np.linspace(ylim[0], ylim[1], 30)
- #YY, XX = np.meshgrid(yy, xx)
- #xy = np.vstack([XX.ravel(), YY.ravel()]).T
- #Z = clf.decision_function(xy).reshape(XX.shape)
- #
- ## plot decision boundary and margins
- #ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,
- # linestyles=['--', '-', '--'])
- ## plot support vectors
- #ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100,
- # linewidth=1, facecolors='none', edgecolors='k')
- #plt.show()
- # Zadanie 4
- #
- #
- #import pandas as pd
- #from sklearn.linear_model import LogisticRegression
- #from sklearn.model_selection import train_test_split
- #from sklearn import svm
- #
- #iris = pd.read_csv("iris.csv")
- #
- #Y = iris.drop("Class",axis=1)
- #X = ['Sepal.length','sepal.width','petal.length','petal.width','variety']
- #
- #
- #
- #
- ## fit the model, don't regularize for illustration purposes
- #clf = svm.SVC(kernel='linear', C=1000)
- #clf.fit(X, Y)
- #
- #plt.scatter(X[:, 0], X[:, 1], c=Y, s=30, cmap=plt.cm.Paired)
- #
- ## plot the decision function
- #ax = plt.gca()
- #xlim = ax.get_xlim()
- #ylim = ax.get_ylim()
- #
- ## create grid to evaluate model
- #xx = np.linspace(xlim[0], xlim[1], 30)
- #yy = np.linspace(ylim[0], ylim[1], 30)
- #YY, XX = np.meshgrid(yy, xx)
- #xy = np.vstack([XX.ravel(), YY.ravel()]).T
- #Z = clf.decision_function(xy).reshape(XX.shape)
- #
- ## plot decision boundary and margins
- #ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,
- # linestyles=['--', '-', '--'])
- ## plot support vectors
- #ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100,
- # linewidth=1, facecolors='none', edgecolors='k')
- #plt.show()
- #1) Wczytaj plik iris.csv (pandas))
- #2) Podziel dataframe na 2 czesci. W pierwszej losowo 100 przypadkow,
- #w drugiej losowo 50 przypadkow
- #3) Naucz klasyfikator SVM na 100 przypadkach
- #4) Sprawdx dzialanie na 50 przypadkach (policz, ile przypadkow zostalo
- # prawidlowo sklasyfikowanych)
- import numpy as np
- import pandas as pd
- from sklearn.model_selection import train_test_split
- from sklearn.metrics import classification_report
- from sklearn import svm
- from sklearn.metrics import accuracy_score
- data = pd.read_csv('./iris.csv')
- x = data[['sepal.length', 'sepal.width', 'petal.length', 'petal.width']]
- y = np.ravel(data[['variety']], order="C")
- X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33)
- klasyfikator = svm.SVC(kernel = 'rbf', gamma="scale")
- klasyfikator.fit(X_train, y_train)
- y_predicted = klasyfikator.predict(X_test)
- print(classification_report(y_test, y_predicted))
- accuracy = accuracy_score (y_test, y_predicted)
- print("accuracy = ", accuracy * 100, "%")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement