Advertisement
Guest User

Untitled

a guest
Oct 21st, 2019
151
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.16 KB | None | 0 0
  1.  
  2.  
  3. def inner_algo_classification(n_dims, inner_regul_param, representation_d, features, labels, inner_algo_method='algo_w', n_classes=None, train_plot=0):
  4.  
  5. representation_d_inv = np.linalg.pinv(representation_d)
  6.  
  7. total_n_points = features.shape[0]
  8.  
  9. if inner_algo_method == 'algo_w':
  10. def multiclass_hinge_loss(curr_features, curr_labels, weight_matrix):
  11. pred_scores = curr_features @ weight_matrix
  12.  
  13. indicator_part = np.ones(pred_scores.shape)
  14. indicator_part[np.arange(pred_scores.shape[0]), curr_labels] = 0
  15.  
  16. true = pred_scores[np.arange(pred_scores.shape[0]), curr_labels].reshape(-1, 1)
  17. true = np.tile(true, (1, 4))
  18.  
  19. loss = np.max(indicator_part + pred_scores - true, axis=1)
  20.  
  21. loss = np.sum(loss) / total_n_points
  22.  
  23. return loss
  24.  
  25. def penalty(weight_matrix):
  26. penalty_output = inner_regul_param / 2 * np.trace(weight_matrix.T @ representation_d_inv @ weight_matrix)
  27. return penalty_output
  28.  
  29. def subgradient(label, feature, weight_matrix):
  30.  
  31. pred_scores = feature @ weight_matrix
  32.  
  33. indicator_part = np.ones(pred_scores.shape)
  34. indicator_part[label] = 0
  35.  
  36. true = pred_scores[label]
  37. true = np.tile(true, (1, 4))
  38.  
  39. j_star = np.argmax(indicator_part + pred_scores - true)
  40.  
  41. subgrad = np.zeros(weight_matrix.shape)
  42.  
  43. if label != j_star:
  44. subgrad[:, label] = -feature
  45. subgrad[:, j_star] = feature
  46.  
  47. return subgrad
  48. else:
  49. raise ValueError("Unknown inner algorithm.")
  50.  
  51. curr_weight_matrix = np.zeros((n_dims, n_classes))
  52. moving_average_weights = curr_weight_matrix
  53. obj = []
  54. subgradients = []
  55.  
  56. curr_epoch_obj = 10**10
  57. big_fucking_counter = 0
  58. for epoch in range(1):
  59. subgradients = []
  60. prev_epoch_obj = curr_epoch_obj
  61. shuffled_points = np.random.permutation(range(features.shape[0]))
  62.  
  63. for curr_point_idx, curr_point in enumerate(shuffled_points):
  64. big_fucking_counter = big_fucking_counter + 1
  65. prev_weight_matrix = curr_weight_matrix
  66.  
  67. # Compute subgradient
  68. s = subgradient(labels[curr_point], features[curr_point], prev_weight_matrix)
  69. subgradients.append(s)
  70.  
  71. # Update
  72. step = 1 / (inner_regul_param * (epoch * len(shuffled_points) + curr_point_idx + 1 + 1))
  73. full_subgrad = representation_d @ s + inner_regul_param * prev_weight_matrix
  74. curr_weight_matrix = prev_weight_matrix - step * full_subgrad
  75.  
  76. moving_average_weights = (moving_average_weights * (big_fucking_counter + 1) + curr_weight_matrix * 1) / (big_fucking_counter + 2)
  77.  
  78. curr_obj = multiclass_hinge_loss(features, labels, curr_weight_matrix) + penalty(curr_weight_matrix)
  79. obj.append(curr_obj)
  80. # print('epoch %5d | obj: %10.5f | step: %16.10f' % (epoch, obj[-1], step))
  81. curr_epoch_obj = obj[-1]
  82. conv = np.abs(curr_epoch_obj - prev_epoch_obj) / prev_epoch_obj
  83. if conv < 1e-8:
  84. # print('BREAKING epoch %5d | obj: %10.5f | step: %16.10f' % (epoch, obj[-1], step))
  85. break
  86.  
  87. if train_plot == 1:
  88. plt.figure(999)
  89. plt.clf()
  90. plt.plot(obj)
  91. plt.pause(0.25)
  92.  
  93. final_subgradient = np.sum(subgradients, axis=0)
  94.  
  95. return final_subgradient, moving_average_weights, obj
  96.  
  97.  
  98.  
  99.  
  100.  
  101.  
  102. ########################################################################################
  103. ########################################################################################
  104. ########################################################################################
  105. ########################################################################################
  106.  
  107.  
  108. Meta algo bit:
  109.  
  110. # Approximate the gradient
  111. if self.data_info.dataset == 'miniwikipedia':
  112. g = loss_subgradient
  113. gg = g @ g.T
  114. else:
  115. g = data.features_tr[task].T @ loss_subgradient
  116. gg = np.outer(g, g)
  117. approx_grad = - gg / (2 * self.inner_regul_param * data.features_tr[task].shape[0] ** 2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement