Advertisement
Guest User

Untitled

a guest
Apr 20th, 2019
156
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.72 KB | None | 0 0
  1. import sys
  2. import time
  3. from sklearn.model_selection import train_test_split
  4. from argparse import ArgumentParser
  5. sys.path.append('/home/tiberiu/tema2_ml/libsvm-master/python')
  6. from svmutil import *
  7. import matplotlib.pyplot as plt
  8. import numpy as np
  9. import pandas as pd
  10. import matplotlib.pyplot as plt;
  11. plt.rcdefaults()
  12. import numpy as np
  13. import matplotlib.pyplot as plt
  14. import decimal
  15. import numpy as np
  16. import matplotlib.pyplot as plt
  17.  
  18. from sklearn import svm, datasets
  19. from sklearn.model_selection import train_test_split
  20. from sklearn.metrics import confusion_matrix
  21. from sklearn.utils.multiclass import unique_labels
  22. from copy import deepcopy
  23.  
  24. lala = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
  25.  
  26. def plot_confusion_matrix(strategy, y_true, y_pred, classes,kernel_param,
  27. normalize=False,
  28. title=None,
  29. cmap=plt.cm.Blues):
  30. """
  31. This function prints and plots the confusion matrix.
  32. Normalization can be applied by setting `normalize=True`.
  33. """
  34. if not title:
  35. if normalize:
  36. title = 'Normalized confusion matrix'
  37. else:
  38. title = 'Confusion matrix, without normalization'
  39.  
  40. # Compute confusion matrix
  41. cm = confusion_matrix(y_true, y_pred)
  42. # Only use the labels that appear in the data
  43. #classes = classes[unique_labels(y_true, y_pred)]
  44. if normalize:
  45. cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
  46. print("Normalized confusion matrix")
  47. else:
  48. print('Confusion matrix, without normalization')
  49.  
  50. print(cm)
  51.  
  52. fig, ax = plt.subplots(figsize=(55, 55))
  53. im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
  54. ax.figure.colorbar(im, ax=ax)
  55. # We want to show all ticks...
  56. ax.set(xticks=np.arange(cm.shape[1]),
  57. yticks=np.arange(cm.shape[0]),
  58. # ... and label them with the respective list entries
  59. xticklabels=classes, yticklabels=classes,
  60. title=title,
  61. ylabel='True label',
  62. xlabel='Predicted label')
  63.  
  64. # Rotate the tick labels and set their alignment.
  65. plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
  66. rotation_mode="anchor")
  67.  
  68. # Loop over data dimensions and create text annotations.
  69. fmt = '.2f' if normalize else 'd'
  70. thresh = cm.max() / 2.
  71. for i in range(cm.shape[0]):
  72. for j in range(cm.shape[1]):
  73. ax.text(j, i, format(cm[i, j], fmt),
  74. ha="center", va="center",
  75. color="white" if cm[i, j] > thresh else "black")
  76. fig.tight_layout()
  77. plt.savefig('charts/' + strategy + kernel_param + '.png')
  78. plt.close(fig)
  79. return ax
  80.  
  81.  
  82. def drange(x, y, jump):
  83. while x <= y:
  84. yield float(x)
  85. x += jump
  86.  
  87.  
  88. def get_args():
  89. parser = ArgumentParser()
  90.  
  91. parser.add_argument("--data_set", type=str, default='skin_nonskin',
  92. help="Data set.")
  93.  
  94. parser.add_argument("--kernel_type", type=str, default=' -t 2 ',
  95. help="Kernel Type.")
  96.  
  97. parser.add_argument("--degree_start", type=int, default=1,
  98. help="Degree Start")
  99.  
  100. parser.add_argument("--degree_end", type=int, default=100,
  101. help="Degree end")
  102.  
  103. parser.add_argument("--degree_step", type=int, default=20,
  104. help="Degree step")
  105.  
  106. parser.add_argument("--gamma_start", type=float, default=0.1,
  107. help="Gamma start")
  108.  
  109. parser.add_argument("--gamma_end", type=float, default=5,
  110. help="Gamma start")
  111.  
  112. parser.add_argument("--gamma_step", type=float, default=0.9,
  113. help="Gamma start")
  114.  
  115. parser.add_argument("--coef_start", type=float, default=0,
  116. help="Coef start")
  117.  
  118. parser.add_argument("--coef_end", type=float, default=100,
  119. help="Coef end")
  120.  
  121. parser.add_argument("--coef_step", type=float, default=20,
  122. help="Coef0")
  123.  
  124. parser.add_argument("--type_of_multiple", type=str, default='one_vs_one',
  125. help='Type of multiple classification')
  126.  
  127.  
  128. args = parser.parse_args()
  129. data_set = args.data_set
  130. kernel_type = args.kernel_type
  131. degree_start = args.degree_start
  132. degree_end = args.degree_end
  133. degree_step = args.degree_step
  134. gamma_start = args.gamma_start
  135. gamma_end = args.gamma_end
  136. gamma_step = args.gamma_step
  137. coef_start = args.coef_start
  138. coef_end = args.coef_end
  139. coef_step = args.coef_step
  140. multiple_classification = args.type_of_multiple
  141.  
  142. return data_set, kernel_type, degree_start, degree_end, degree_step, \
  143. gamma_start, gamma_end, gamma_step, coef_start, coef_end, coef_step, multiple_classification
  144.  
  145.  
  146. def get_data_set(i, train):
  147. new_labels = [int(idx) if int(i) == idx else 0 for idx in train]
  148. return new_labels
  149.  
  150. def get_error(y_test, p_label):
  151.  
  152. assert len(y_test) == len(p_label), 'Not same size wut'
  153. false = 0
  154. for i in range(len(y_test)):
  155. if y_test[i] != p_label[i]:
  156. false += 1
  157.  
  158. return false / len(y_test)
  159.  
  160. def main():
  161.  
  162. data_set, kernel_type, degree_start, degree_end, degree_step, \
  163. gamma_start, gamma_end, gamma_step, coef_start, coef_end, coef_step, multiple_classification = get_args()
  164.  
  165. final_list_elems = []
  166. final_list_vals = []
  167.  
  168. strategy = ''
  169. if multiple_classification == 'one_vs_one':
  170. strategy += '1v1'
  171. elif multiple_classification == "one_vs_all":
  172. strategy += '1va'
  173.  
  174. for gamma in drange(gamma_start, gamma_end, gamma_step):
  175. for coef in drange(coef_start, coef_end, coef_step):
  176. for degree in range(degree_start, degree_end + 1, degree_step):
  177. kernel_param = ' ' + kernel_type + ' -g ' + str(gamma) + ' -r ' + str(coef) + ' -d ' + str(degree)
  178. y, x = svm_read_problem(data_set)
  179. X_train, X_test, y_train, y_test = \
  180. train_test_split(x, y, test_size=.40, random_state=42)
  181.  
  182. uni_labels = unique_labels([int(i) for i in y_train])
  183. #print(uni_labels)
  184. if data_set == 'skin_nonskin':
  185. m = svm_train(y_train, X_train, kernel_param)
  186. p_label, p_acc, p_val = svm_predict(y_test, X_test, m)
  187. final_list_elems.append(kernel_param + ' err: ' + str(p_acc[0]))
  188. final_list_vals.append(p_acc[1])
  189. plot_confusion_matrix(strategy, [int(i) for i in y_test], [int(i) for i in p_label], [1, 2], kernel_param, normalize=True,
  190. title='Normalized confusion matrix')
  191.  
  192. else:
  193. y_test, x_test = svm_read_problem('news20.t.scale')
  194. if multiple_classification == 'one_vs_one':
  195. return
  196. elif multiple_classification == "one_vs_all":
  197. cor_data_set = []
  198. for i in range(1, len(uni_labels) + 1):
  199. cor_data_set.append(get_data_set(i, y))
  200. m = []
  201. for i in range(len(uni_labels)):
  202. m.append(svm_train(cor_data_set[i], x, kernel_param))
  203.  
  204. vote_lists = []
  205. votes = [0] * (len(uni_labels) + 1)
  206. for i in range(len(y_test)):
  207. vote_lists.append(deepcopy(votes))
  208.  
  209. p_label = None
  210.  
  211. for i in range(len(m)):
  212. p_label, p_acc, p_val = svm_predict(y_test, x_test, m[i])
  213. #print('PLABEL' + str(p_label) +' \n\n\n\n\n\n\n\n\n\n\n')
  214. for j in range(len(p_label)):
  215. if int(p_label[j]) != 0:
  216. vote_lists[j][int(p_label[j])] += 1
  217. #print(vote_lists)
  218.  
  219. for i in range(len(p_label)):
  220. chosen_class = 0
  221. found = False
  222. for j in range(1, len(votes)):
  223. if vote_lists[i][j] != 0:
  224. if found == True:
  225. chosen_class = 0
  226. break
  227. chosen_class = j
  228. found = True
  229. p_label[i] = chosen_class
  230. err = get_error(y_test, p_label)
  231. final_list_elems.append(strategy + ' ' + kernel_param)
  232. final_list_vals.append(err)
  233. plot_confusion_matrix(strategy, [int(i) for i in y_test], [int(i) for i in p_label], lala, kernel_param, normalize=True,
  234. title='Normalized confusion matrix')
  235.  
  236. else:
  237. print("invalid strategy")
  238. return
  239.  
  240. plt.rcdefaults()
  241. fig, ax = plt.subplots(figsize=(55, 55))
  242.  
  243. y_pos = np.array(final_list_elems)
  244. performance = np.array(final_list_vals)
  245.  
  246. ax.barh(y_pos, performance, align='center',
  247. color='green', ecolor='black')
  248. ax.set_yticks(y_pos)
  249. ax.set_yticklabels(final_list_elems)
  250. ax.invert_yaxis() # labels read top-to-bottom
  251. ax.set_xlabel('Performance')
  252. ax.set_title('How fast do you want to go today?')
  253. plt.savefig('charts/' + strategy + kernel_type + ' error' + '.png')
  254. print('final')
  255.  
  256.  
  257. if __name__ == "__main__":
  258. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement