Advertisement
lancernik

AIS6

May 14th, 2019
136
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.54 KB | None | 0 0
  1.  
  2. #import math
  3. #import matplotlib.pyplot as plt
  4. #import numpy as np
  5. #import os
  6. #import pandas as pd
  7. #import csv
  8. #
  9.  
  10. #Zad 1
  11. #def sigmoid(x):
  12. # z = math.e**(-x)
  13. # return 1 / (1 + z)
  14. #
  15. #
  16. #x_s = np.arange(-10, 10, 0.1)
  17. #y_s = sigmoid(x_s)
  18. #
  19. #plt.figure()
  20. #plt.plot(x_s, y_s, color = "blue")
  21. #plt.show()
  22. #
  23. #
  24. #
  25.  
  26.  
  27. #Zadanie 2
  28.  
  29. #
  30. #
  31. #from sklearn.model_selection import train_test_split
  32. #from sklearn import metrics
  33. #from sklearn.linear_model import LogisticRegression
  34. #from sklearn import datasets
  35. #import seaborn as sns
  36. #
  37. #current_dir = os.path.abspath(os.path.dirname(__file__))
  38. #csv_path = os.path.join(current_dir, "diabetes.csv")
  39. #diab_data = pd.read_csv(csv_path)
  40. #
  41. #
  42. ##rozdzielamy dane wg nazw
  43. #feature_names = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']
  44. #
  45. #X = diab_data[feature_names]
  46. #y = diab_data.Outcome
  47. #
  48. #X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
  49. #
  50. #print("{0:0.2f}% data is in training set".format((len(X_train)/len( diab_data.index)) * 100))
  51. #print("{0:0.2f}% data is in test set".format((len(X_test)/len( diab_data.index)) * 100))
  52. #
  53. #
  54. #
  55. #lregr=LogisticRegression()
  56. #lregr.fit(X_train, y_train)
  57. #pred_1 = lregr.predict(X_test)
  58. #
  59. ###w sumie fajna notatka
  60. ###A confusion matrix is a table that is used to evaluate the performance of a classification model.
  61. ###You can also visualize the performance of an algorithm. The fundamental of a confusion matrix is the number of correct
  62. ###and incorrect predictions are summed up class-wise.
  63. #
  64. #conf_matrix = metrics.confusion_matrix(y_test, pred_1)
  65. #class_names=[0,1]
  66. #fig, ax = plt.subplots()
  67. #tick_marks = [0,1]
  68. #plt.xticks(tick_marks, class_names)
  69. #plt.yticks(tick_marks, class_names)
  70. #sns.heatmap(pd.DataFrame(conf_matrix), annot=True, cmap="YlGnBu" ,fmt='g')
  71. #ax.xaxis.set_label_position("top")
  72. #plt.tight_layout()
  73. #plt.title('Confusion matrix', y=1.1)
  74. #plt.ylabel('Actual label')
  75. #plt.xlabel('Predicted label')
  76. #
  77. #print("Original Diabetes True Values : {0} ({1:0.2f}%)".format(len(diabetes_mod.loc[diabetes_mod['Outcome'] == 1]), (len(diabetes_mod.loc[diabetes_mod['Outcome'] == 1])/len(diabetes_mod.index)) * 100))
  78. #print("Original Diabetes False Values : {0} ({1:0.2f}%)".format(len(diabetes_mod.loc[diabetes_mod['Outcome'] == 0]), (len(diabetes_mod.loc[diabetes_mod['Outcome'] == 0])/len(diabetes_mod.index)) * 100))
  79. #print("")
  80. #print("Training Diabetes True Values : {0} ({1:0.2f}%)".format(len(y_train[y_train[:] == 1]), (len(y_train[y_train[:] == 1])/len(y_train)) * 100))
  81. #print("Training Diabetes False Values : {0} ({1:0.2f}%)".format(len(y_train[y_train[:] == 0]), (len(y_train[y_train[:] == 0])/len(y_train)) * 100))
  82. #print("")
  83. #print("Test Diabetes True Values : {0} ({1:0.2f}%)".format(len(y_test[y_test[:] == 1]), (len(y_test[y_test[:] == 1])/len(y_test)) * 100))
  84. #print("Test Diabetes False Values : {0} ({1:0.2f}%)".format(len(y_test[y_test[:] == 0]), (len(y_test[y_test[:] == 0])/len(y_test)) * 100))
  85. #print("")
  86. #
  87. #db = diabetes_mod.corr()
  88. #
  89. #def plot_corr(df, size=11):
  90. # corr = df.corr()
  91. # fig, ax = plt.subplots(figsize=(size, size))
  92. # ax.matshow(corr)
  93. # plt.xticks(range(len(corr.columns)), corr.columns)
  94. # plt.yticks(range(len(corr.columns)), corr.columns)
  95. #
  96. #plot_corr(db)
  97.  
  98.  
  99.  
  100.  
  101.  
  102.  
  103. #Zadanie 3
  104.  
  105.  
  106. #
  107. #from sklearn.datasets.samples_generator import make_blobs
  108. #from sklearn import svm
  109. #
  110. #X, Y = make_blobs(n_samples=50, centers=2, cluster_std=0.60)
  111. ## fit the model, don't regularize for illustration purposes
  112. #clf = svm.SVC(kernel='linear', C=1000)
  113. #clf.fit(X, Y)
  114. #
  115. #plt.scatter(X[:, 0], X[:, 1], c=Y, s=30, cmap=plt.cm.Paired)
  116. #
  117. ## plot the decision function
  118. #ax = plt.gca()
  119. #xlim = ax.get_xlim()
  120. #ylim = ax.get_ylim()
  121. #
  122. ## create grid to evaluate model
  123. #xx = np.linspace(xlim[0], xlim[1], 30)
  124. #yy = np.linspace(ylim[0], ylim[1], 30)
  125. #YY, XX = np.meshgrid(yy, xx)
  126. #xy = np.vstack([XX.ravel(), YY.ravel()]).T
  127. #Z = clf.decision_function(xy).reshape(XX.shape)
  128. #
  129. ## plot decision boundary and margins
  130. #ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,
  131. # linestyles=['--', '-', '--'])
  132. ## plot support vectors
  133. #ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100,
  134. # linewidth=1, facecolors='none', edgecolors='k')
  135. #plt.show()
  136.  
  137.  
  138.  
  139.  
  140. # Zadanie 4
  141. #
  142. #
  143. #import pandas as pd
  144. #from sklearn.linear_model import LogisticRegression
  145. #from sklearn.model_selection import train_test_split
  146. #from sklearn import svm
  147. #
  148. #iris = pd.read_csv("iris.csv")
  149. #
  150. #Y = iris.drop("Class",axis=1)
  151. #X = ['Sepal.length','sepal.width','petal.length','petal.width','variety']
  152. #
  153. #
  154. #
  155. #
  156. ## fit the model, don't regularize for illustration purposes
  157. #clf = svm.SVC(kernel='linear', C=1000)
  158. #clf.fit(X, Y)
  159. #
  160. #plt.scatter(X[:, 0], X[:, 1], c=Y, s=30, cmap=plt.cm.Paired)
  161. #
  162. ## plot the decision function
  163. #ax = plt.gca()
  164. #xlim = ax.get_xlim()
  165. #ylim = ax.get_ylim()
  166. #
  167. ## create grid to evaluate model
  168. #xx = np.linspace(xlim[0], xlim[1], 30)
  169. #yy = np.linspace(ylim[0], ylim[1], 30)
  170. #YY, XX = np.meshgrid(yy, xx)
  171. #xy = np.vstack([XX.ravel(), YY.ravel()]).T
  172. #Z = clf.decision_function(xy).reshape(XX.shape)
  173. #
  174. ## plot decision boundary and margins
  175. #ax.contour(XX, YY, Z, colors='k', levels=[-1, 0, 1], alpha=0.5,
  176. # linestyles=['--', '-', '--'])
  177. ## plot support vectors
  178. #ax.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=100,
  179. # linewidth=1, facecolors='none', edgecolors='k')
  180. #plt.show()
  181.  
  182.  
  183.  
  184.  
  185.  
  186.  
  187.  
  188.  
  189.  
  190. #1) Wczytaj plik iris.csv (pandas))
  191. #2) Podziel dataframe na 2 czesci. W pierwszej losowo 100 przypadkow,
  192. #w drugiej losowo 50 przypadkow
  193. #3) Naucz klasyfikator SVM na 100 przypadkach
  194. #4) Sprawdx dzialanie na 50 przypadkach (policz, ile przypadkow zostalo
  195. # prawidlowo sklasyfikowanych)
  196.  
  197. import numpy as np
  198. import pandas as pd
  199. from sklearn.model_selection import train_test_split
  200. from sklearn.metrics import classification_report
  201. from sklearn import svm
  202. from sklearn.metrics import accuracy_score
  203.  
  204. data = pd.read_csv('./iris.csv')
  205.  
  206. x = data[['sepal.length', 'sepal.width', 'petal.length', 'petal.width']]
  207. y = np.ravel(data[['variety']], order="C")
  208.  
  209. X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33)
  210.  
  211.  
  212. klasyfikator = svm.SVC(kernel = 'rbf', gamma="scale")
  213. klasyfikator.fit(X_train, y_train)
  214. y_predicted = klasyfikator.predict(X_test)
  215.  
  216. print(classification_report(y_test, y_predicted))
  217.  
  218. accuracy = accuracy_score (y_test, y_predicted)
  219. print("accuracy = ", accuracy * 100, "%")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement