CamolaZ

2021-05-12 15.20.04 Workshop Machine Learning - Classification 88412401897

Jun 10th, 2021
550
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. 15:19:53 From João Correia To Everyone:
  2.     https://colab.research.google.com/
  3. 15:32:39 From João Correia To Everyone:
  4.     import sys
  5.     import os
  6.     import pandas as pd
  7.     import numpy as np
  8.     import matplotlib.pyplot as plt
  9.     from sklearn.model_selection import KFold
  10.     import seaborn as sns
  11.     from sklearn.metrics import f1_score
  12.     from sklearn.metrics import matthews_corrcoef
  13.     from IPython.display import display
  14.     from matplotlib.pyplot import figure
  15.     from sklearn.preprocessing import StandardScaler
  16.     from sklearn.model_selection import train_test_split
  17.     from sklearn.metrics import classification_report,confusion_matrix
  18.     from sklearn.metrics import accuracy_score
  19.     from sklearn.model_selection import cross_val_score
  20.     from sklearn.model_selection import GridSearchCV
  21.     from sklearn.naive_bayes import GaussianNB
  22.     from sklearn.neighbors import KNeighborsClassifier
  23.     from sklearn.tree import DecisionTreeClassifier
  24.     from sklearn.ensemble import RandomForestClassifier
  25.     from keras.models import Sequential
  26.     from keras.layers import Dense, Dropout
  27.     from keras.wrappers.scikit_learn import KerasClassifier
  28.     from sklearn.metrics import roc_curve, roc_auc_score
  29.     from sklearn.dataset
  30. 15:33:25 From João Correia To Everyone:
  31.     ---------------
  32. 15:33:28 From João Correia To Everyone:
  33.     import sys
  34.     import os
  35.     import pandas as pd
  36.     import numpy as np
  37.     import matplotlib.pyplot as plt
  38.     from sklearn.model_selection import KFold
  39.     import seaborn as sns
  40.     from sklearn.metrics import f1_score
  41.     from sklearn.metrics import matthews_corrcoef
  42.     from IPython.display import display
  43.     from matplotlib.pyplot import figure
  44.     from sklearn.preprocessing import StandardScaler
  45.     from sklearn.model_selection import train_test_split
  46.     from sklearn.metrics import classification_report,confusion_matrix
  47.     from sklearn.metrics import accuracy_score
  48.     from sklearn.model_selection import cross_val_score
  49.     from sklearn.model_selection import GridSearchCV
  50.     from sklearn.naive_bayes import GaussianNB
  51.     from sklearn.neighbors import KNeighborsClassifier
  52.     from sklearn.tree import DecisionTreeClassifier
  53.     from sklearn.ensemble import RandomForestClassifier
  54.     from keras.models import Sequential
  55.     from keras.layers import Dense, Dropout
  56.     from keras.wrappers.scikit_learn import KerasClassifier
  57.     from sklearn.metrics import roc_curve, roc_auc_score
  58. 15:33:35 From João Correia To Everyone:
  59.     from sklearn.datasets import make_regression
  60.     from sklearn.linear_model import LinearRegression
  61.     from sklearn.metrics import mean_squared_error
  62.     from sklearn import datasets
  63.     from sklearn.svm import SVC
  64.     from sklearn.model_selection import train_test_split
  65. 16:05:04 From João Correia To Everyone:
  66.     # Load a simple toy dataset
  67.     data = datasets.load_breast_cancer()
  68.    
  69.     X = data.data
  70.     y = data.target
  71.     print(X[0])
  72.     print(y[0])
  73.     print(data.target_names[y[0]])
  74.     #
  75.     lista_benigno = [exemplo for exemplo in range(len(y)) if y[exemplo] == 1]
  76.     print(lista_benigno)
  77.     x_benigno = X[lista_benigno]
  78.     y_benigno = y[lista_benigno]
  79.     print(len(x_benigno))
  80.     #
  81.     lista_maligno = [exemplo for exemplo in range(len(y)) if y[exemplo] == 0]
  82.     print(lista_maligno)
  83.     x_maligno = X[lista_maligno]
  84.     y_maligno = y[lista_maligno]
  85.     print(len(x_maligno))
  86.     print(len(X))
  87. 16:05:13 From João Correia To Everyone:
  88.     def model_train_and_evaluate(a_classifier, _x_train, _y_train, _x_test, _y_test, key=""):
  89.        
  90.         #fit -> train
  91.         a_model = a_classifier.fit(_x_train, _y_train)
  92.        
  93.         # test
  94.         yproba = a_model.predict_proba(_x_test)[::,1]
  95.         ypredicted = a_model.predict(_x_test)
  96.        
  97.         # extract metrics   tipically   y_true, y_pred
  98.         fpr, tpr, _ = roc_curve(_y_test,  yproba)
  99.         auc = roc_auc_score(_y_test, yproba)
  100.         conf_m = confusion_matrix(_y_test,ypredicted)
  101.         mcc = matthews_corrcoef(_y_test, ypredicted)
  102.         f1 = f1_score(_y_test,ypredicted)
  103.         # we can add more..
  104.         result_table = pd.DataFrame(columns=['classifiers', 'fpr','tpr','auc','f1', 'mcc', 'confusion_matrix'])
  105.         result_table = result_table.append({'classifiers':a_classifier.__class__.__name__+"-"+key,
  106.                                             'fpr':fpr,
  107.                                             'tpr':tpr,
  108.                                             'auc':auc,
  109.                                             'confusion_matrix': conf_m,
  110. 16:05:42 From João Correia To Everyone:
  111.     def model_train_and_evaluate(a_classifier, _x_train, _y_train, _x_test, _y_test, key=""):
  112.        
  113.         #fit -> train
  114.         a_model = a_classifier.fit(_x_train, _y_train)
  115.        
  116.         # test
  117.         yproba = a_model.predict_proba(_x_test)[::,1]
  118.         ypredicted = a_model.predict(_x_test)
  119.        
  120.         # extract metrics   tipically   y_true, y_pred
  121.         fpr, tpr, _ = roc_curve(_y_test,  yproba)
  122.         auc = roc_auc_score(_y_test, yproba)
  123.         conf_m = confusion_matrix(_y_test,ypredicted)
  124.         mcc = matthews_corrcoef(_y_test, ypredicted)
  125.         f1 = f1_score(_y_test,ypredicted)
  126. 16:05:54 From João Correia To Everyone:
  127.     # we can add more..
  128.         result_table = pd.DataFrame(columns=['classifiers', 'fpr','tpr','auc','f1', 'mcc', 'confusion_matrix'])
  129.         result_table = result_table.append({'classifiers':a_classifier.__class__.__name__+"-"+key,
  130.                                             'fpr':fpr,
  131.                                             'tpr':tpr,
  132.                                             'auc':auc,
  133.                                             'confusion_matrix': conf_m,
  134.                                             'mcc':mcc,
  135.                                             'f1': f1}, ignore_index=True)
  136.         return result_table
  137. 16:06:02 From João Correia To Everyone:
  138.     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.30, random_state=1234)
  139.     print(X_train.shape)
  140.    
  141.     results = model_train_and_evaluate(DecisionTreeClassifier(random_state=2020), X_train, y_train, X_test, y_test)
  142.     results.head()
  143. 16:06:40 From João Correia To Everyone:
  144.     https://colab.research.google.com/drive/1BS28rnY7B4R0ejidxJeqxQ2e2FAnLCCf#scrollTo=bZqZpwbgIBIa&line=22&uniqifier=1
  145. 16:08:18 From Dylan Perdigão | 2018233092 To Everyone:
  146.     a mim funcioou
  147. 16:11:49 From João Correia To Everyone:
  148.     def plot_roc_insame(result_table):
  149.         result_table.set_index('classifiers', inplace=True)
  150.        
  151.         fig = plt.figure(figsize=(8,6))
  152.        
  153.         for i in result_table.index:
  154.             plt.plot(result_table.loc[i]['fpr'],
  155.                      result_table.loc[i]['tpr'],
  156.                      label="{}, AUC={:.3f}".format(i, result_table.loc[i]['auc']))
  157.            
  158.         plt.plot([0,1], [0,1], color='black', linestyle='--') # random chance!
  159.         plt.xticks(np.arange(0.0, 1.1, step=0.1))
  160.         plt.xlabel("False Positive Rate", fontsize=15)
  161.         plt.yticks(np.arange(0.0, 1.1, step=0.1))
  162.         plt.ylabel("True Positive Rate", fontsize=15)
  163.         plt.title('ROC Curve Analysis', fontweight='bold', fontsize=15)
  164.         plt.legend(prop={'size':13}, loc='lower right')
  165.         plt.show()
  166. 16:15:03 From João Correia To Everyone:
  167.     # create a set of other models
  168.     classifiers = [GaussianNB(),
  169.                    KNeighborsClassifier(n_neighbors=3),
  170.                    DecisionTreeClassifier(random_state=2020),
  171.                    RandomForestClassifier(random_state=2020),
  172.                    SVC(kernel="linear", C=0.025, probability=True)
  173.                   ]
  174.    
  175.     results = pd.DataFrame(columns=['classifiers', 'fpr','tpr','auc','f1', 'mcc', 'confusion_matrix'])
  176.    
  177.     # train !
  178.     for cls in classifiers:
  179.         results = results.append(model_train_and_evaluate(cls, X_train, y_train, X_test, y_test))
  180.    
  181.     results.head()
  182.     print(results)
  183.     plot_roc_insame(results)
  184. 16:20:30 From João Correia To Everyone:
  185.     --------------------- alteração para ter mais do que um classificador igual na lista
  186. 16:20:30 From João Correia To Everyone:
  187.     # create a set of other models
  188.     classifiers = [GaussianNB(),
  189.                    KNeighborsClassifier(n_neighbors=3),
  190.                    KNeighborsClassifier(n_neighbors=5),
  191.                    RandomForestClassifier(random_state=2020),
  192.                    SVC(kernel="linear", C=0.025, probability=True),
  193.                    SVC(kernel="linear", C=0.01, probability=True)
  194.                   ]
  195.    
  196.     results = pd.DataFrame(columns=['classifiers', 'fpr','tpr','auc','f1', 'mcc', 'confusion_matrix'])
  197.     import random
  198.     # train !
  199.     i = 0
  200.     for cls in classifiers:
  201.         results = results.append(model_train_and_evaluate(cls, X_train, y_train, X_test, y_test, str(i)))
  202.         i+=1
  203.    
  204.     results.head()
  205.     print(results)
  206.     plot_roc_insame(results)
  207. 16:22:56 From João Correia To Everyone:
  208.     https://keras.io/api/#models-api
  209. 16:30:18 From João Correia To Everyone:
  210.     def Deep_Model(activation= 'linear', neurons= 25, optimizer='Adam', input_size = 30):
  211.         model = Sequential()
  212.         # fully #1
  213.         model.add(Dense(neurons, input_dim= input_size, activation= activation))
  214.         # fully #2
  215.         model.add(Dense(neurons, activation= activation))
  216.         # dropout layer
  217.         model.add(Dropout(0.3))
  218.         model.add(Dense(1, activation='sigmoid'))
  219.         model.compile(loss='binary_crossentropy', optimizer= optimizer, metrics=['accuracy'])
  220.         return model
  221.    
  222.    
  223.     # ensure the dataset!
  224.     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.30, random_state=1234)
  225.    
  226.     # create ref to keras model
  227.     clf = KerasClassifier(build_fn=Deep_Model, epochs= 100, batch_size=40, verbose= 1)
  228. 16:30:28 From João Correia To Everyone:
  229.     # train
  230.     clf.fit(X_train,y_train)
  231.    
  232.     #test on samples! In this case whole test dataset
  233.     y_pred = clf.predict(X_test)
  234.    
  235.     # get probabilities
  236.     yproba = clf.predict_proba(X_test)[::,1]
  237.    
  238.     # extract metrics   tipically   y_true, y_pred
  239.     fpr, tpr, _ = roc_curve(y_test,  yproba)
  240.     auc = roc_auc_score(y_test, yproba)
  241.     conf_m = confusion_matrix(y_test,ypredicted)
  242.     mcc = matthews_corrcoef(y_test, ypredicted)
  243.     f1 = f1_score(y_test,ypredicted)
  244.    
  245.     result_table = pd.DataFrame(columns=['classifiers', 'fpr','tpr','auc','f1', 'mcc', 'confusion_matrix'])
  246.     result_table = result_table.append({'classifiers':"DNN",
  247.                                         'fpr':fpr,
  248.                                         'tpr':tpr,
  249.                                         'auc':auc,
  250.                                         'confusion_matrix': conf_m,
  251.                                         'mcc':mcc,
  252.                                         'f1': f1}, ignore_index=True)
  253.    
  254.     print('AUC', auc)
  255.     print(classification_report(y_test,y_pred))
  256.     print(conf_m)
  257.    
  258.     # plot roc curve!
  259.     plot_roc_insame(result_
  260. 16:30:41 From João Correia To Everyone:
  261.     # train
  262.     clf.fit(X_train,y_train)
  263.    
  264.     #test on samples! In this case whole test dataset
  265.     y_pred = clf.predict(X_test)
  266.    
  267.     # get probabilities
  268.     yproba = clf.predict_proba(X_test)[::,1]
  269.    
  270.     # extract metrics   tipically   y_true, y_pred
  271.     fpr, tpr, _ = roc_curve(y_test,  yproba)
  272.     auc = roc_auc_score(y_test, yproba)
  273.     conf_m = confusion_matrix(y_test,ypredicted)
  274.     mcc = matthews_corrcoef(y_test, ypredicted)
  275.     f1 = f1_score(y_test,ypredicted)
  276.    
  277.     result_table = pd.DataFrame(columns=['classifiers', 'fpr','tpr','auc','f1', 'mcc', 'confusion_matrix'])
  278.     result_table = result_table.append({'classifiers':"DNN",
  279.                                         'fpr':fpr,
  280.                                         'tpr':tpr,
  281.                                         'auc':auc,
  282.                                         'confusion_matrix': conf_m,
  283.                                         'mcc':mcc,
  284.                                         'f1': f1}, ignore_index=True)
  285.    
  286.     print('AUC', auc)
  287.     print(classification_report(y_test,y_pred))
  288.     print(conf_m)
  289. 16:30:49 From João Correia To Everyone:
  290.     # plot roc curve!
  291.     plot_roc_insame(result_table)
  292.    
  293.     result_table.head()
  294. 16:42:29 From João Correia To Everyone:
  295.     try:
  296.         import pickle
  297.         with open('/content/drive/My Drive/Workshop-SB-IEEE/cifar10.pickle', 'rb') as f:
  298.             original_train_images, train_labels, original_test_images, test_labels = pickle.load(f)
  299.     except:
  300.        
  301.         from tensorflow.keras.datasets import cifar10
  302.         (original_train_images,train_labels),(original_test_images,test_labels) = cifar10.load_data()
  303. 16:42:43 From João Correia To Everyone:
  304.     import matplotlib.pyplot as plt
  305.    
  306.     class_names = ['airplane','automobile','bird','cat','deer',
  307.                    'dog','frog','horse','ship','truck']
  308.    
  309.     plt.figure(figsize=(7, 7))
  310.     for i in range(16):
  311.         ax = plt.subplot(4, 4, i + 1)
  312.         plt.imshow(original_train_images[i])
  313.         plt.title(class_names[int(train_labels[i])])
  314.         plt.axis("off")
  315. 16:42:52 From João Correia To Everyone:
  316.     train_images = original_train_images / 255.0
  317.     test_images = original_test_images / 255.0
  318. 16:43:00 From João Correia To Everyone:
  319.     from tensorflow.keras import layers, models
  320.    
  321.     model = models.Sequential()
  322.    
  323.     model.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(32, 32, 3)))
  324.     model.add(layers.MaxPooling2D((2, 2)))
  325.     model.add(layers.Conv2D(64, (3, 3), activation='relu'))
  326.     model.add(layers.MaxPooling2D((2, 2)))
  327.     model.add(layers.Conv2D(64, (3, 3), activation='relu'))
  328.     model.add(layers.Dropout(0.5))
  329.    
  330.     model.add(layers.Flatten())
  331.     model.add(layers.Dense(128, activation='relu'))
  332.     model.add(layers.Dense(10))
  333. 16:43:10 From João Correia To Everyone:
  334.     from tensorflow.keras.losses import SparseCategoricalCrossentropy
  335.    
  336.     model.compile(optimizer='adam',
  337.                   loss=SparseCategoricalCrossentropy(from_logits=True),
  338.                   metrics=['accuracy'])
  339.    
  340.     # train!
  341.     history = model.fit(train_images, train_labels, epochs=20, validation_split=0.2)
  342. 16:46:24 From João Correia To Everyone:
  343.     plt.figure(figsize=(15, 6))
  344.    
  345.     plt.subplot(1,2,1)
  346.     plt.plot(history.history['accuracy'], label='Accuracy')
  347.     plt.plot(history.history['val_accuracy'], label = 'Val. Accuracy')
  348.     plt.xlabel('Epoch')
  349.     plt.ylabel('Accuracy')
  350.     plt.ylim([0.4, 1])
  351.     plt.legend(loc='upper right')
  352.    
  353.     plt.subplot(1,2,2)
  354.     plt.plot(history.history['loss'], label='Loss')
  355.     plt.plot(history.history['val_loss'], label = 'Val. Loss')
  356.     plt.xlabel('Epoch')
  357.     plt.ylabel('Loss')
  358.     plt.ylim([0.6, 1.6])
  359.     plt.legend(loc='upper right')
  360. 16:46:31 From João Correia To Everyone:
  361.     test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
  362. 16:46:38 From João Correia To Everyone:
  363.     print(test_images[0:1].shape) # dar uma shape que o modelo aceite!
  364.     print(model.predict(test_images[0:1])) # testing only one!
  365. 16:56:43 From Catarina Rocha To Everyone:
  366.     Muito obrigada foi uma sessão bastante útil mas vou ter que sair
  367. 16:57:12 From Ismael Jesus To José(Privately):
  368.     Olá José, podes-me dizer o teu nome com que te inscreveste para depois enviar o certificado de participação?
  369. 16:58:42 From Ismael Jesus To José(Privately):
  370.     Se não tiveres feito a inscrição, posso mandar-te o link de inscrição
RAW Paste Data