Advertisement
Guest User

Untitled

a guest
Oct 15th, 2019
140
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.06 KB | None | 0 0
  1. from fastai.basics import *
  2.  
  3. class ErrorCallback(LearnerCallback):
  4. def __init__(self, lrn:Learner):
  5. super().__init__(lrn)
  6. self.err_loss,self.err_input,self.err_output = None,None,None
  7.  
  8. def on_train_begin(self, **kwargs):
  9. def hook(mod, inps, outs):
  10. nfs = []
  11. for inp in inps:
  12. if inp is None: continue
  13. inp = inp.detach()
  14. nfs.append((
  15. (inp == inp.new_full((1,), np.inf)).sum().cpu(), # Count non-finites
  16. (inp == inp.new_full((1,), np.nan)).sum().cpu() # On GPU so don't check yet
  17. ))
  18. return (mod, nfs)
  19. self.module_names = {m: n for n,m in iter_children(mdl_mish)}
  20. self.hooks = callbacks.Hooks([m for m in self.module_names.keys() if hasattr(m, 'weight')],
  21. hook, is_forward=False, detach=False)
  22.  
  23. def on_batch_end(self, num_batch, last_loss, last_input, last_output, pbar, **kwargs):
  24. if not np.isfinite(last_loss) and self.err_loss is None:
  25. self.err_loss,self.err_input,self.err_output = last_loss,last_input,last_output
  26. pbar.write(f"Non-finite loss on batch {num_batch}")
  27. return {'stop_epoch': True, 'stop_training': True}
  28.  
  29. def on_backward_end(self, num_batch, last_loss, last_input, last_output, pbar, **kwargs):
  30. for mod,nfs in self.hooks.stored:
  31. infs,nans = 0,0
  32. for inf,nan in nfs:
  33. infs += inf
  34. nans += nan
  35. if infs or nans:
  36. name = self.module_names[mod]
  37. pbar.write(f"Non-finite gradients on batch {num_batch} from child {name}, {infs} inf, {nans} nan. Aborting.")
  38. self.err_loss,self.err_input,self.err_output = last_loss,last_input,last_output
  39. return {'stop_epoch': True, 'stop_training': True}
  40.  
  41. def on_train_end(self, **kwargs): self.hooks.remove()
  42.  
  43. def on_epoch_end(self, **kwargs):
  44. if self.err_loss is not None: return {'stop_training': True}
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement