Untitled



def inner_algo_classification(n_dims, inner_regul_param, representation_d, features, labels, inner_algo_method='algo_w', n_classes=None, train_plot=0):

    representation_d_inv = np.linalg.pinv(representation_d)

    total_n_points = features.shape[0]

    if inner_algo_method == 'algo_w':
        def multiclass_hinge_loss(curr_features, curr_labels, weight_matrix):
            pred_scores = curr_features @ weight_matrix

            indicator_part = np.ones(pred_scores.shape)
            indicator_part[np.arange(pred_scores.shape[0]), curr_labels] = 0

            true = pred_scores[np.arange(pred_scores.shape[0]), curr_labels].reshape(-1, 1)
            true = np.tile(true, (1, 4))

            loss = np.max(indicator_part + pred_scores - true, axis=1)

            loss = np.sum(loss) / total_n_points

            return loss

        def penalty(weight_matrix):
            penalty_output = inner_regul_param / 2 * np.trace(weight_matrix.T @ representation_d_inv @ weight_matrix)
            return penalty_output

        def subgradient(label, feature, weight_matrix):

            pred_scores = feature @ weight_matrix

            indicator_part = np.ones(pred_scores.shape)
            indicator_part[label] = 0

            true = pred_scores[label]
            true = np.tile(true, (1, 4))

            j_star = np.argmax(indicator_part + pred_scores - true)

            subgrad = np.zeros(weight_matrix.shape)

            if label != j_star:
                subgrad[:, label] = -feature
                subgrad[:, j_star] = feature

            return subgrad
    else:
        raise ValueError("Unknown inner algorithm.")

    curr_weight_matrix = np.zeros((n_dims, n_classes))
    moving_average_weights = curr_weight_matrix
    obj = []
    subgradients = []

    curr_epoch_obj = 10**10
    big_fucking_counter = 0
    for epoch in range(1):
        subgradients = []
        prev_epoch_obj = curr_epoch_obj
        shuffled_points = np.random.permutation(range(features.shape[0]))

        for curr_point_idx, curr_point in enumerate(shuffled_points):
            big_fucking_counter = big_fucking_counter + 1
            prev_weight_matrix = curr_weight_matrix

            # Compute subgradient
            s = subgradient(labels[curr_point], features[curr_point], prev_weight_matrix)
            subgradients.append(s)

            # Update
            step = 1 / (inner_regul_param * (epoch * len(shuffled_points) + curr_point_idx + 1 + 1))
            full_subgrad = representation_d @ s + inner_regul_param * prev_weight_matrix
            curr_weight_matrix = prev_weight_matrix - step * full_subgrad

            moving_average_weights = (moving_average_weights * (big_fucking_counter + 1) + curr_weight_matrix * 1) / (big_fucking_counter + 2)

            curr_obj = multiclass_hinge_loss(features, labels, curr_weight_matrix) + penalty(curr_weight_matrix)
            obj.append(curr_obj)
        # print('epoch %5d | obj: %10.5f | step: %16.10f' % (epoch, obj[-1], step))
        curr_epoch_obj = obj[-1]
        conv = np.abs(curr_epoch_obj - prev_epoch_obj) / prev_epoch_obj
        if conv < 1e-8:
            # print('BREAKING epoch %5d | obj: %10.5f | step: %16.10f' % (epoch, obj[-1], step))
            break

    if train_plot == 1:
        plt.figure(999)
        plt.clf()
        plt.plot(obj)
        plt.pause(0.25)

    final_subgradient = np.sum(subgradients, axis=0)

    return final_subgradient, moving_average_weights, obj


########################################################################################
########################################################################################
########################################################################################
########################################################################################


Meta algo bit:

# Approximate the gradient
if self.data_info.dataset == 'miniwikipedia':
	g = loss_subgradient
	gg = g @ g.T
else:
	g = data.features_tr[task].T @ loss_subgradient
	gg = np.outer(g, g)
approx_grad = - gg / (2 * self.inner_regul_param * data.features_tr[task].shape[0] ** 2)