Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def softmax(class_scores):
- """
- Calculate class probability distribution for each digit from given class scores.
- :param class_scores: class scores of your function
- :return: probability distribution
- """
- class_scores -= np.max(class_scores)
- return np.exp(class_scores) / np.sum(np.exp(class_scores),axis=1, keepdims=True)
- def onehot_encode_label(label):
- """
- Support function to convert label vector into a one hot encoding matrix
- :param label: array with shape (D,) , D can be whatever you want
- :return: one hot encoding matrix
- """
- onehot_encoder = OneHotEncoder(sparse=False)
- label = label.reshape(len(label), 1)
- onehot_encoded_label = onehot_encoder.fit_transform(label)
- return onehot_encoded_label
- def data_loss(class_probabilities, onehot_encode_label):
- """
- Compute data_loss L_i for the correct class with a onehot encoded label
- :param class_probabilities: probabilities from the softmax function
- :param onehot_encode_label: correct labels in a one hot enconding shape
- :return: the data loss L_i
- """
- return onehot_encode_label * -np.log(class_probabilities)
- def loss(X, y, theta, lam):
- """
- :param X: data
- :param y: label of the data
- :param theta: learnable parameters
- :param lam: regularization factor
- :return: loss and gradient as a tupel
- """
- encoded_labels = onehot_encode_label(y) # also needed for the gradient, therefore separated calculated
- probabilities = softmax(class_scores(X,theta)) # also needed for the gradient, therefore separated calculated
- loss_Li = data_loss(probabilities,encoded_labels)
- m = X.shape[0] # number of training data for normalization
- l2_regularization = (lam/2)*np.sum(theta*theta) # regularization loss
- loss = np.sum(loss_Li)/m + l2_regularization
- dl2 = lam*theta
- dloss = np.dot(X.T, (probabilities - encoded_labels)/m)
- gradient = dloss + dl2
- return loss, gradient
- def sgd(training_data, training_label, theta, lam=0.5, iterations=100, learning_rate=1e-5, batch_size=256):
- losses = []
- for i in range(iterations):
- shuffle_index = np.random.permutation(training_data.shape[0])
- data, label = training_data[shuffle_index], training_label[shuffle_index]
- data, label = data[:batch_size], label[:batch_size]
- l, grad = loss(data, label, theta, lam)
- losses.append(l)
- theta -= learning_rate*grad
- return theta, losses
- # Initialize learnable parameters theta
- theta = np.zeros([X_train.shape[1],len(np.unique(y_train))])
- # Start optimization with traning data, theta and optional hyperparameters
- opt_model, loss_history = sgd(X_train,y_train,theta,iterations=250)
- # evaluation
- print('last iteration loss:',loss_history[-1])
- print('first iteration loss:',loss_history[0])
- print('Is the first loss equal to ln(10)?', np.log(10) - loss_history[0] < 0.000001) # if its false you have a implementation error
- # plot a loss curve
- plt.plot(loss_history)
- plt.ylabel('loss')
- plt.xlabel('iterations')
- plt.show()
- # plot weights
- plt.figure(figsize=(20, 20))
- num_classes = 10
- for c in range(num_classes):
- f = plt.subplot(10, num_classes, 1 * num_classes + c + 1)
- f.axis('off')
- plt.imshow(np.reshape(opt_model[:,c],[28,28]))
- plt.show()
Add Comment
Please, Sign In to add comment