Advertisement
Guest User

Untitled

a guest
Jun 18th, 2019
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.06 KB | None | 0 0
  1. class Softmax:
  2. # ...
  3.  
  4. def backprop(self, d_L_d_out):
  5. '''
  6. Performs a backward pass of the softmax layer.
  7. Returns the loss gradient for this layer's inputs.
  8. - d_L_d_out is the loss gradient for this layer's outputs.
  9. '''
  10. # We know only 1 element of d_L_d_out will be nonzero
  11. for i, gradient in enumerate(d_L_d_out):
  12. if gradient == 0:
  13. continue
  14.  
  15. # e^totals
  16. t_exp = np.exp(self.last_totals)
  17.  
  18. # Sum of all e^totals
  19. S = np.sum(t_exp)
  20.  
  21. # Gradients of out[i] against totals
  22. d_out_d_t = -t_exp[i] * t_exp / (S ** 2)
  23. d_out_d_t[i] = t_exp[i] * (S - t_exp[i]) / (S ** 2)
  24.  
  25. # Gradients of totals against weights/biases/input
  26. d_t_d_w = self.last_input
  27. d_t_d_b = 1
  28. d_t_d_inputs = self.weights
  29.  
  30. # Gradients of loss against totals
  31. d_L_d_t = gradient * d_out_d_t
  32.  
  33. # Gradients of loss against weights/biases/input
  34. d_L_d_w = d_t_d_w[np.newaxis].T @ d_L_d_t[np.newaxis]
  35. d_L_d_b = d_L_d_t * d_t_d_b
  36. d_L_d_inputs = d_t_d_inputs @ d_L_d_t
  37.  
  38. # ... to be continued
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement