Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def update_moving(previous, current, alpha):
- if previous is None:
- return current
- return previous * alpha + current * (1 - alpha)
- class BatchNormalization(Module):
- EPS = 1e-3
- def __init__(self, alpha = 0.):
- super(BatchNormalization, self).__init__()
- self.alpha = alpha
- self.moving_mean = None
- self.moving_variance = None
- self._stds = None
- self.sqrtVal = None
- self.xcentered = None
- def updateOutput(self, input):
- if self.training:
- means = np.mean(input, axis=0, keepdims=False)
- self._stds = np.mean((input - means[np.newaxis, ...]) ** 2, axis=0)
- self.moving_mean = update_moving(self.moving_mean, means, self.alpha)
- self.moving_variance = update_moving(self.moving_variance, self._stds * (len(input) / (len(input) - 1)), self.alpha)
- else:
- means = self.moving_mean
- self._stds = self.moving_variance
- self.mus = means
- self.xcentered = (input - means[np.newaxis, ...])
- self.sqrtVal = np.sqrt(self._stds + self.EPS)
- self.output = self.xcentered / self.sqrtVal
- return self.output
- def updateGradInput(self, input, gradOutput):
- if self.training:
- batch_size = len(input)
- N = batch_size
- inp_size = len(input[0])
- sqr = self.sqrtVal[np.newaxis, :, np.newaxis]
- nominator = (np.identity(batch_size) - (np.ones((batch_size, batch_size)) / N))[:, np.newaxis, :]
- first_summand = nominator / sqr
- mus = self.mus[np.newaxis, :, np.newaxis]
- second_nominator = (input[:,:,np.newaxis] - mus) * (np.transpose(input)[np.newaxis,:,:] - mus) / N
- second_summand = second_nominator / sqr ** 3
- self.gradInput = np.transpose(np.sum(gradOutput[:, :, np.newaxis] * (first_summand + second_summand), axis=0))
- else:
- self.gradInput = gradOutput / np.sqrt(self._stds[np.newaxis, ...] + self.EPS)
- return self.gradInput
- def __repr__(self):
- return "BatchNormalization"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement