Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- import time
- from sklearn.model_selection import train_test_split
- from argparse import ArgumentParser
- sys.path.append('/home/tiberiu/tema2_ml/libsvm-master/python')
- from svmutil import *
- import matplotlib.pyplot as plt
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt;
- plt.rcdefaults()
- import numpy as np
- import matplotlib.pyplot as plt
- import decimal
- import numpy as np
- import matplotlib.pyplot as plt
- from sklearn import svm, datasets
- from sklearn.model_selection import train_test_split
- from sklearn.metrics import confusion_matrix
- from sklearn.utils.multiclass import unique_labels
- from copy import deepcopy
- lala = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
- def plot_confusion_matrix(strategy, y_true, y_pred, classes,kernel_param,
- normalize=False,
- title=None,
- cmap=plt.cm.Blues):
- """
- This function prints and plots the confusion matrix.
- Normalization can be applied by setting `normalize=True`.
- """
- if not title:
- if normalize:
- title = 'Normalized confusion matrix'
- else:
- title = 'Confusion matrix, without normalization'
- # Compute confusion matrix
- cm = confusion_matrix(y_true, y_pred)
- # Only use the labels that appear in the data
- #classes = classes[unique_labels(y_true, y_pred)]
- if normalize:
- cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
- print("Normalized confusion matrix")
- else:
- print('Confusion matrix, without normalization')
- print(cm)
- fig, ax = plt.subplots(figsize=(55, 55))
- im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
- ax.figure.colorbar(im, ax=ax)
- # We want to show all ticks...
- ax.set(xticks=np.arange(cm.shape[1]),
- yticks=np.arange(cm.shape[0]),
- # ... and label them with the respective list entries
- xticklabels=classes, yticklabels=classes,
- title=title,
- ylabel='True label',
- xlabel='Predicted label')
- # Rotate the tick labels and set their alignment.
- plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
- rotation_mode="anchor")
- # Loop over data dimensions and create text annotations.
- fmt = '.2f' if normalize else 'd'
- thresh = cm.max() / 2.
- for i in range(cm.shape[0]):
- for j in range(cm.shape[1]):
- ax.text(j, i, format(cm[i, j], fmt),
- ha="center", va="center",
- color="white" if cm[i, j] > thresh else "black")
- fig.tight_layout()
- plt.savefig('charts/' + strategy + kernel_param + '.png')
- plt.close(fig)
- return ax
- def drange(x, y, jump):
- while x <= y:
- yield float(x)
- x += jump
- def get_args():
- parser = ArgumentParser()
- parser.add_argument("--data_set", type=str, default='skin_nonskin',
- help="Data set.")
- parser.add_argument("--kernel_type", type=str, default=' -t 2 ',
- help="Kernel Type.")
- parser.add_argument("--degree_start", type=int, default=1,
- help="Degree Start")
- parser.add_argument("--degree_end", type=int, default=100,
- help="Degree end")
- parser.add_argument("--degree_step", type=int, default=20,
- help="Degree step")
- parser.add_argument("--gamma_start", type=float, default=0.1,
- help="Gamma start")
- parser.add_argument("--gamma_end", type=float, default=5,
- help="Gamma start")
- parser.add_argument("--gamma_step", type=float, default=0.9,
- help="Gamma start")
- parser.add_argument("--coef_start", type=float, default=0,
- help="Coef start")
- parser.add_argument("--coef_end", type=float, default=100,
- help="Coef end")
- parser.add_argument("--coef_step", type=float, default=20,
- help="Coef0")
- parser.add_argument("--type_of_multiple", type=str, default='one_vs_one',
- help='Type of multiple classification')
- args = parser.parse_args()
- data_set = args.data_set
- kernel_type = args.kernel_type
- degree_start = args.degree_start
- degree_end = args.degree_end
- degree_step = args.degree_step
- gamma_start = args.gamma_start
- gamma_end = args.gamma_end
- gamma_step = args.gamma_step
- coef_start = args.coef_start
- coef_end = args.coef_end
- coef_step = args.coef_step
- multiple_classification = args.type_of_multiple
- return data_set, kernel_type, degree_start, degree_end, degree_step, \
- gamma_start, gamma_end, gamma_step, coef_start, coef_end, coef_step, multiple_classification
- def get_data_set(i, train):
- new_labels = [int(idx) if int(i) == idx else 0 for idx in train]
- return new_labels
- def get_error(y_test, p_label):
- assert len(y_test) == len(p_label), 'Not same size wut'
- false = 0
- for i in range(len(y_test)):
- if y_test[i] != p_label[i]:
- false += 1
- return false / len(y_test)
- def main():
- data_set, kernel_type, degree_start, degree_end, degree_step, \
- gamma_start, gamma_end, gamma_step, coef_start, coef_end, coef_step, multiple_classification = get_args()
- final_list_elems = []
- final_list_vals = []
- strategy = ''
- if multiple_classification == 'one_vs_one':
- strategy += '1v1'
- elif multiple_classification == "one_vs_all":
- strategy += '1va'
- for gamma in drange(gamma_start, gamma_end, gamma_step):
- for coef in drange(coef_start, coef_end, coef_step):
- for degree in range(degree_start, degree_end + 1, degree_step):
- kernel_param = ' ' + kernel_type + ' -g ' + str(gamma) + ' -r ' + str(coef) + ' -d ' + str(degree)
- y, x = svm_read_problem(data_set)
- X_train, X_test, y_train, y_test = \
- train_test_split(x, y, test_size=.40, random_state=42)
- uni_labels = unique_labels([int(i) for i in y_train])
- #print(uni_labels)
- if data_set == 'skin_nonskin':
- m = svm_train(y_train, X_train, kernel_param)
- p_label, p_acc, p_val = svm_predict(y_test, X_test, m)
- final_list_elems.append(kernel_param + ' err: ' + str(p_acc[0]))
- final_list_vals.append(p_acc[1])
- plot_confusion_matrix(strategy, [int(i) for i in y_test], [int(i) for i in p_label], [1, 2], kernel_param, normalize=True,
- title='Normalized confusion matrix')
- else:
- y_test, x_test = svm_read_problem('news20.t.scale')
- if multiple_classification == 'one_vs_one':
- return
- elif multiple_classification == "one_vs_all":
- cor_data_set = []
- for i in range(1, len(uni_labels) + 1):
- cor_data_set.append(get_data_set(i, y))
- m = []
- for i in range(len(uni_labels)):
- m.append(svm_train(cor_data_set[i], x, kernel_param))
- vote_lists = []
- votes = [0] * (len(uni_labels) + 1)
- for i in range(len(y_test)):
- vote_lists.append(deepcopy(votes))
- p_label = None
- for i in range(len(m)):
- p_label, p_acc, p_val = svm_predict(y_test, x_test, m[i])
- #print('PLABEL' + str(p_label) +' \n\n\n\n\n\n\n\n\n\n\n')
- for j in range(len(p_label)):
- if int(p_label[j]) != 0:
- vote_lists[j][int(p_label[j])] += 1
- #print(vote_lists)
- for i in range(len(p_label)):
- chosen_class = 0
- found = False
- for j in range(1, len(votes)):
- if vote_lists[i][j] != 0:
- if found == True:
- chosen_class = 0
- break
- chosen_class = j
- found = True
- p_label[i] = chosen_class
- err = get_error(y_test, p_label)
- final_list_elems.append(strategy + ' ' + kernel_param)
- final_list_vals.append(err)
- plot_confusion_matrix(strategy, [int(i) for i in y_test], [int(i) for i in p_label], lala, kernel_param, normalize=True,
- title='Normalized confusion matrix')
- else:
- print("invalid strategy")
- return
- plt.rcdefaults()
- fig, ax = plt.subplots(figsize=(55, 55))
- y_pos = np.array(final_list_elems)
- performance = np.array(final_list_vals)
- ax.barh(y_pos, performance, align='center',
- color='green', ecolor='black')
- ax.set_yticks(y_pos)
- ax.set_yticklabels(final_list_elems)
- ax.invert_yaxis() # labels read top-to-bottom
- ax.set_xlabel('Performance')
- ax.set_title('How fast do you want to go today?')
- plt.savefig('charts/' + strategy + kernel_type + ' error' + '.png')
- print('final')
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement