Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def inner_algo_classification(n_dims, inner_regul_param, representation_d, features, labels, inner_algo_method='algo_w', n_classes=None, train_plot=0):
- representation_d_inv = np.linalg.pinv(representation_d)
- total_n_points = features.shape[0]
- if inner_algo_method == 'algo_w':
- def multiclass_hinge_loss(curr_features, curr_labels, weight_matrix):
- pred_scores = curr_features @ weight_matrix
- indicator_part = np.ones(pred_scores.shape)
- indicator_part[np.arange(pred_scores.shape[0]), curr_labels] = 0
- true = pred_scores[np.arange(pred_scores.shape[0]), curr_labels].reshape(-1, 1)
- true = np.tile(true, (1, 4))
- loss = np.max(indicator_part + pred_scores - true, axis=1)
- loss = np.sum(loss) / total_n_points
- return loss
- def penalty(weight_matrix):
- penalty_output = inner_regul_param / 2 * np.trace(weight_matrix.T @ representation_d_inv @ weight_matrix)
- return penalty_output
- def subgradient(label, feature, weight_matrix):
- pred_scores = feature @ weight_matrix
- indicator_part = np.ones(pred_scores.shape)
- indicator_part[label] = 0
- true = pred_scores[label]
- true = np.tile(true, (1, 4))
- j_star = np.argmax(indicator_part + pred_scores - true)
- subgrad = np.zeros(weight_matrix.shape)
- if label != j_star:
- subgrad[:, label] = -feature
- subgrad[:, j_star] = feature
- return subgrad
- else:
- raise ValueError("Unknown inner algorithm.")
- curr_weight_matrix = np.zeros((n_dims, n_classes))
- moving_average_weights = curr_weight_matrix
- obj = []
- subgradients = []
- curr_epoch_obj = 10**10
- big_fucking_counter = 0
- for epoch in range(1):
- subgradients = []
- prev_epoch_obj = curr_epoch_obj
- shuffled_points = np.random.permutation(range(features.shape[0]))
- for curr_point_idx, curr_point in enumerate(shuffled_points):
- big_fucking_counter = big_fucking_counter + 1
- prev_weight_matrix = curr_weight_matrix
- # Compute subgradient
- s = subgradient(labels[curr_point], features[curr_point], prev_weight_matrix)
- subgradients.append(s)
- # Update
- step = 1 / (inner_regul_param * (epoch * len(shuffled_points) + curr_point_idx + 1 + 1))
- full_subgrad = representation_d @ s + inner_regul_param * prev_weight_matrix
- curr_weight_matrix = prev_weight_matrix - step * full_subgrad
- moving_average_weights = (moving_average_weights * (big_fucking_counter + 1) + curr_weight_matrix * 1) / (big_fucking_counter + 2)
- curr_obj = multiclass_hinge_loss(features, labels, curr_weight_matrix) + penalty(curr_weight_matrix)
- obj.append(curr_obj)
- # print('epoch %5d | obj: %10.5f | step: %16.10f' % (epoch, obj[-1], step))
- curr_epoch_obj = obj[-1]
- conv = np.abs(curr_epoch_obj - prev_epoch_obj) / prev_epoch_obj
- if conv < 1e-8:
- # print('BREAKING epoch %5d | obj: %10.5f | step: %16.10f' % (epoch, obj[-1], step))
- break
- if train_plot == 1:
- plt.figure(999)
- plt.clf()
- plt.plot(obj)
- plt.pause(0.25)
- final_subgradient = np.sum(subgradients, axis=0)
- return final_subgradient, moving_average_weights, obj
- ########################################################################################
- ########################################################################################
- ########################################################################################
- ########################################################################################
- Meta algo bit:
- # Approximate the gradient
- if self.data_info.dataset == 'miniwikipedia':
- g = loss_subgradient
- gg = g @ g.T
- else:
- g = data.features_tr[task].T @ loss_subgradient
- gg = np.outer(g, g)
- approx_grad = - gg / (2 * self.inner_regul_param * data.features_tr[task].shape[0] ** 2)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement