a guest Jun 18th, 2019 52 Never
1. class Softmax
2.   # ...
3.
4.   def backprop(self, d_L_d_out, learn_rate):
5.     '''
6.     Performs a backward pass of the softmax layer.
7.     Returns the loss gradient for this layer's inputs.
8.     - d_L_d_out is the loss gradient for this layer's outputs.
9.     - learn_rate is a float
10.     '''
11.     # We know only 1 element of d_L_d_out will be nonzero
12.     for i, gradient in enumerate(d_L_d_out):
13.       if gradient == 0:
14.         continue
15.
16.       # e^totals
17.       t_exp = np.exp(self.last_totals)
18.
19.       # Sum of all e^totals
20.       S = np.sum(t_exp)
21.
22.       # Gradients of out[i] against totals
23.       d_out_d_t = -t_exp[i] * t_exp / (S ** 2)
24.       d_out_d_t[i] = t_exp[i] * (S - t_exp[i]) / (S ** 2)
25.
26.       # Gradients of totals against weights/biases/input
27.       d_t_d_w = self.last_input
28.       d_t_d_b = 1
29.       d_t_d_inputs = self.weights
30.
31.       # Gradients of loss against totals
32.       d_L_d_t = gradient * d_out_d_t
33.
34.       # Gradients of loss against weights/biases/input
35.       d_L_d_w = d_t_d_w[np.newaxis].T @ d_L_d_t[np.newaxis]
36.       d_L_d_b = d_L_d_t * d_t_d_b
37.       d_L_d_inputs = d_t_d_inputs @ d_L_d_t
38.
39.       # Update weights / biases
40.       self.weights -= learn_rate * d_L_d_w
41.       self.biases -= learn_rate * d_L_d_b
42.
43.       return d_L_d_inputs.reshape(self.last_input_shape)
