lamiastella

landmarks normalization

Oct 15th, 2020
1,131
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. network = Network()
  2. network.cuda()    
  3.  
  4. criterion = nn.MSELoss()
  5. optimizer = optim.Adam(network.parameters(), lr=0.0001)
  6.  
  7. loss_min = np.inf
  8. num_epochs = 1
  9.  
  10. start_time = time.time()
  11. for epoch in range(1,num_epochs+1):
  12.    
  13.     loss_train = 0
  14.     loss_test = 0
  15.     running_loss = 0
  16.    
  17.    
  18.     network.train()
  19.     print('size of train loader is: ', len(train_loader))
  20.  
  21.     for step in range(1,len(train_loader)+1):
  22.  
  23.        
  24.         batch = next(iter(train_loader))
  25.         images, landmarks = batch['image'], batch['landmarks']
  26.         #RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[64, 600, 800, 3] to have 3 channels, but got 600 channels instead
  27.         #using permute below to fix the above error
  28.         images = images.permute(0,3,1,2)
  29.        
  30.         images = images.cuda()
  31.    
  32.         landmarks = landmarks.view(landmarks.size(0),-1).cuda()
  33.    
  34.         norm_image = transforms.Normalize([0.3809, 0.3810, 0.3810], [0.1127, 0.1129, 0.1130])
  35.         for image in images:
  36.             image = image.float()
  37.             ##image = to_tensor(image) #TypeError: pic should be PIL Image or ndarray. Got <class 'torch.Tensor'>
  38.             image = norm_image(image)
  39.        
  40.        
  41.         ###norm_landmarks = transforms.Normalize(0.4949, 0.2165)
  42.         ###landmarks = norm_landmarks(landmarks)
  43.        
  44.         for landmark in landmarks:
  45.             landmark = landmark/743
  46.        
  47.         predictions = network(images)
  48.        
  49.         # clear all the gradients before calculating them
  50.         optimizer.zero_grad()
  51.        
  52.         print('predictions are: ', predictions.float())
  53.         print('landmarks are: ', landmarks.float())
  54.         # find the loss for the current step
  55.         loss_train_step = criterion(predictions.float(), landmarks.float())
  56.        
  57.        
  58.         loss_train_step = loss_train_step.to(torch.float32)
  59.         print("loss_train_step before backward: ", loss_train_step)
  60.        
  61.         # calculate the gradients
  62.         loss_train_step.backward()
  63.        
  64.         # update the parameters
  65.         optimizer.step()
  66.        
  67.         print("loss_train_step after backward: ", loss_train_step)
  68.  
  69.        
  70.         loss_train += loss_train_step.item()
  71.        
  72.         print("loss_train: ", loss_train)
  73.         running_loss = loss_train/step
  74.         print('step: ', step)
  75.         print('running loss: ', running_loss)
  76.        
  77.         print_overwrite(step, len(train_loader), running_loss, 'train')
  78.        
  79.     network.eval()
  80.     with torch.no_grad():
  81.        
  82.         for step in range(1,len(test_loader)+1):
  83.            
  84.             batch = next(iter(train_loader))
  85.             images, landmarks = batch['image'], batch['landmarks']
  86.             images = images.permute(0,3,1,2)
  87.             images = images.cuda()
  88.             landmarks = landmarks.view(landmarks.size(0),-1).cuda()
  89.        
  90.             predictions = network(images)
  91.  
  92.             # find the loss for the current step
  93.             loss_test_step = criterion(predictions, landmarks)
  94.  
  95.             loss_test += loss_test_step.item()
  96.             running_loss = loss_test/step
  97.  
  98.             print_overwrite(step, len(test_loader), running_loss, 'Validation')
  99.    
  100.     loss_train /= len(train_loader)
  101.     loss_test /= len(test_loader)
  102.    
  103.     print('\n--------------------------------------------------')
  104.     print('Epoch: {}  Train Loss: {:.4f} Valid Loss: {:.4f}'.format(epoch, loss_train, loss_test))
  105.     print('--------------------------------------------------')
  106.    
  107.     if loss_test < loss_min:
  108.         loss_min = loss_test
  109.         torch.save(network.state_dict(), '../moth_landmarks.pth')
  110.         print("\nMinimum Valid Loss of {:.4f} at epoch {}/{}".format(loss_min, epoch, num_epochs))
  111.         print('Model Saved\n')
  112.      
  113. print('Training Complete')
  114. print("Total Elapsed Time : {} s".format(time.time()-start_time))
  115.  
  116.  
  117. --------------------------------------------------------------------
  118.  
  119. size of train loader is:  90
  120. predictions are:  tensor([[-0.2563, -0.3646,  0.3769,  0.1143,  0.0023,  0.2944, -0.1278,  0.4752],
  121.         [-0.2647, -0.3612,  0.3365,  0.1329, -0.0065,  0.3049, -0.1599,  0.4826],
  122.         [-0.2759, -0.3272,  0.3171,  0.1391, -0.0192,  0.2739, -0.1707,  0.4273],
  123.         [-0.2945, -0.3464,  0.3645,  0.1480, -0.0273,  0.2682, -0.1386,  0.4688],
  124.         [-0.2539, -0.3436,  0.3657,  0.1210,  0.0040,  0.2758, -0.1780,  0.4699],
  125.         [-0.2821, -0.3451,  0.3319,  0.1236, -0.0122,  0.2557, -0.1706,  0.4598],
  126.         [-0.2869, -0.2988,  0.3146,  0.1384, -0.0284,  0.2619, -0.1784,  0.4358],
  127.         [-0.2417, -0.3456,  0.3381,  0.1549, -0.0175,  0.2926, -0.1432,  0.4399]],
  128.        device='cuda:0', grad_fn=<AddmmBackward>)
  129. landmarks are:  tensor([[494.0148, 240.8076, 712.0000, 270.0000, 350.0000, 351.0000, 494.0000,
  130.          323.0000],
  131.         [500.1400, 249.4700, 719.0000, 245.0000, 303.0000, 287.0000, 498.0000,
  132.          338.0000],
  133.         [486.9100, 239.8900, 703.0000, 267.0000, 322.0000, 279.0000, 424.5074,
  134.          306.1910],
  135.         [494.8400, 247.7400, 712.0000, 274.0000, 315.0000, 325.0000, 458.6980,
  136.          306.5814],
  137.         [488.8000, 242.8000, 696.0000, 269.0000, 297.0000, 238.0000, 407.0000,
  138.          331.0000],
  139.         [498.1000, 245.8600, 713.0000, 246.0000, 322.0000, 143.0000, 461.0000,
  140.          322.0000],
  141.         [497.7600, 246.4500, 715.0000, 254.0000, 288.0000, 229.0000, 466.0000,
  142.          317.0000],
  143.         [496.3600, 244.8300, 716.0000, 241.0000, 303.0000, 282.0000, 477.0000,
  144.          340.0000]], device='cuda:0')
  145. loss_train_step before backward:  tensor(170825.2812, device='cuda:0', grad_fn=<MseLossBackward>)
  146. loss_train_step after backward:  tensor(170825.2812, device='cuda:0', grad_fn=<MseLossBackward>)
  147. loss_train:  170825.28125
  148. step:  1
  149. running loss:  170825.28125
  150. Train Steps: 1/90  Loss: 170825.2812 predictions are:  tensor([[ 0.0076, -0.2180,  0.6485,  0.2242,  0.1381,  0.3463,  0.0539,  0.6395],
  151.         [-0.0352, -0.2353,  0.6883,  0.2799,  0.1462,  0.3891,  0.0105,  0.6426],
  152.         [-0.0063, -0.2052,  0.7500,  0.2959,  0.1565,  0.4173,  0.0810,  0.6845],
  153.         [-0.0642, -0.2926,  0.7434,  0.2611,  0.1618,  0.4400,  0.0463,  0.6856],
  154.         [-0.0411, -0.1966,  0.7119,  0.2435,  0.1632,  0.3932,  0.0168,  0.6313],
  155.         [-0.0357, -0.2337,  0.7370,  0.2766,  0.1566,  0.3968,  0.0537,  0.6819],
  156.         [ 0.0105, -0.2863,  0.7531,  0.2688,  0.1779,  0.4589, -0.0235,  0.6847],
  157.         [-0.0319, -0.2861,  0.7344,  0.2631,  0.1316,  0.4394,  0.0566,  0.6840]],
  158.        device='cuda:0', grad_fn=<AddmmBackward>)
  159. landmarks are:  tensor([[     nan,      nan, 638.5021, 191.6575, 290.0000, 190.0000, 403.1752,
  160.          333.7941],
  161.         [502.1200, 244.9500, 668.0000, 163.0000, 365.0000, 108.0000, 473.4160,
  162.          292.6776],
  163.         [492.9800, 236.9300, 707.0000, 271.0000, 340.0000, 311.0000, 467.0000,
  164.          330.0000],
  165.         [500.0100, 246.3300, 696.0000, 223.0000, 287.0000, 298.0000, 483.0000,
  166.          310.0000],
  167.         [506.8508, 251.4398, 715.0000, 310.0000, 315.0000, 211.0000, 587.4209,
  168.          344.8954],
  169.         [498.2935, 243.9347, 681.0000, 343.0000, 360.0000, 303.0000, 482.0000,
  170.          321.0000],
  171.         [496.1100, 241.6200, 642.3539, 163.6537, 323.3445, 138.6042, 478.4196,
  172.          323.4764],
  173.         [503.1100, 241.8900, 673.4919, 326.7499, 326.0000, 301.0000, 505.0000,
  174.          307.0000]], device='cuda:0')
  175. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  176. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  177. loss_train:  nan
  178. step:  2
  179. running loss:  nan
  180. Train Steps: 2/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  181.         [nan, nan, nan, nan, nan, nan, nan, nan],
  182.         [nan, nan, nan, nan, nan, nan, nan, nan],
  183.         [nan, nan, nan, nan, nan, nan, nan, nan],
  184.         [nan, nan, nan, nan, nan, nan, nan, nan],
  185.         [nan, nan, nan, nan, nan, nan, nan, nan],
  186.         [nan, nan, nan, nan, nan, nan, nan, nan],
  187.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  188.        grad_fn=<AddmmBackward>)
  189. landmarks are:  tensor([[489.9700, 238.4500, 618.0000, 151.0000, 283.0000, 199.0000, 471.0000,
  190.          330.0000],
  191.         [495.7400, 245.4600, 704.0000, 287.0000, 283.0000, 286.0000, 476.0000,
  192.          333.0000],
  193.         [501.0700, 243.4400, 704.0000, 230.0000, 292.0000, 223.0000, 509.9838,
  194.          288.2302],
  195.         [488.7300, 240.1700, 692.0000, 293.0000, 382.0000, 292.0000, 414.0000,
  196.          341.0000],
  197.         [496.3300, 245.4400, 716.0000, 287.0000, 289.0000, 277.0000, 485.0000,
  198.          337.0000],
  199.         [484.6700, 238.7000, 663.0000, 216.0000, 272.0000, 243.0000, 442.3150,
  200.          327.6658],
  201.         [501.9900, 244.8500, 645.0000, 121.0000, 386.0000,  95.0000, 492.4471,
  202.          292.1411],
  203.         [495.4411, 242.0584, 620.5229, 140.8099, 298.6643, 175.1677, 474.4262,
  204.          295.5407]], device='cuda:0')
  205. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  206. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  207. loss_train:  nan
  208. step:  3
  209. running loss:  nan
  210. Train Steps: 3/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  211.         [nan, nan, nan, nan, nan, nan, nan, nan],
  212.         [nan, nan, nan, nan, nan, nan, nan, nan],
  213.         [nan, nan, nan, nan, nan, nan, nan, nan],
  214.         [nan, nan, nan, nan, nan, nan, nan, nan],
  215.         [nan, nan, nan, nan, nan, nan, nan, nan],
  216.         [nan, nan, nan, nan, nan, nan, nan, nan],
  217.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  218.        grad_fn=<AddmmBackward>)
  219. landmarks are:  tensor([[501.5900, 255.9900, 572.0000, 199.0000, 392.0000, 109.0000, 444.8080,
  220.          371.0059],
  221.         [488.6600, 240.6300, 643.0000, 204.0000, 296.0000, 176.0000, 452.6434,
  222.          337.0366],
  223.         [502.0600, 247.2100, 699.0000, 188.0000, 338.0000, 133.0000, 496.2461,
  224.          293.5293],
  225.         [493.1591, 244.5602, 707.0000, 247.0000, 297.0000, 333.0000, 499.0000,
  226.          321.0000],
  227.         [490.3400, 244.1100, 700.0000, 304.0000, 310.0000, 254.0000, 418.8311,
  228.          352.8785],
  229.         [502.8574, 242.0584, 655.2804, 144.8417, 340.9529, 143.5759, 509.1774,
  230.          321.3521],
  231.         [494.7200, 244.7300, 668.0000, 222.0000, 294.0000, 173.0000, 425.0000,
  232.          347.0000],
  233.         [492.0700, 247.4900, 699.0000, 265.0000, 286.0000, 227.0000, 411.0000,
  234.          329.0000]], device='cuda:0')
  235. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  236. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  237. loss_train:  nan
  238. step:  4
  239. running loss:  nan
  240. Train Steps: 4/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  241.         [nan, nan, nan, nan, nan, nan, nan, nan],
  242.         [nan, nan, nan, nan, nan, nan, nan, nan],
  243.         [nan, nan, nan, nan, nan, nan, nan, nan],
  244.         [nan, nan, nan, nan, nan, nan, nan, nan],
  245.         [nan, nan, nan, nan, nan, nan, nan, nan],
  246.         [nan, nan, nan, nan, nan, nan, nan, nan],
  247.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  248.        grad_fn=<AddmmBackward>)
  249. landmarks are:  tensor([[495.7500, 245.3800, 626.0000, 150.0000, 336.0000, 149.0000, 479.0000,
  250.          340.0000],
  251.         [498.6900, 241.4000, 711.0000, 278.0000, 318.0000, 346.0000, 512.0000,
  252.          311.0000],
  253.         [492.3034, 246.7491, 563.0000, 139.0000, 339.0000, 110.0000, 428.0000,
  254.          336.0000],
  255.         [498.8640, 237.9932, 694.0000, 324.0000, 309.0000, 271.0000, 466.0000,
  256.          312.0000],
  257.         [489.8800, 245.0100, 556.3521, 184.1524, 292.0000, 165.0000, 413.0584,
  258.          329.1087],
  259.         [484.6600, 239.1800, 665.8854, 277.5496, 307.0000, 299.0000, 411.7228,
  260.          327.9374],
  261.         [490.1900, 243.9500, 684.0000, 334.0000, 372.9783, 308.4714, 405.5792,
  262.          324.7162],
  263.         [482.0700, 238.7200, 684.0000, 254.0000, 289.0000, 314.0000, 446.5888,
  264.          297.9915]], device='cuda:0')
  265. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  266. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  267. loss_train:  nan
  268. step:  5
  269. running loss:  nan
  270.  
  271. Train Steps: 5/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  272.         [nan, nan, nan, nan, nan, nan, nan, nan],
  273.         [nan, nan, nan, nan, nan, nan, nan, nan],
  274.         [nan, nan, nan, nan, nan, nan, nan, nan],
  275.         [nan, nan, nan, nan, nan, nan, nan, nan],
  276.         [nan, nan, nan, nan, nan, nan, nan, nan],
  277.         [nan, nan, nan, nan, nan, nan, nan, nan],
  278.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  279.        grad_fn=<AddmmBackward>)
  280. landmarks are:  tensor([[491.6300, 240.4200, 702.0000, 272.0000, 365.0000, 332.0000, 487.0000,
  281.          332.0000],
  282.         [500.1200, 249.7500, 693.0000, 268.0000, 290.0000, 214.0000, 483.0000,
  283.          332.0000],
  284.         [488.1700, 239.9200, 700.0000, 308.0000, 306.0000, 285.0000, 451.0000,
  285.          305.0000],
  286.         [489.8800, 245.0100, 556.3521, 184.1524, 292.0000, 165.0000, 413.0584,
  287.          329.1087],
  288.         [500.8400, 246.3800, 642.0000, 155.0000, 364.0000, 112.0000, 502.5046,
  289.          292.1347],
  290.         [496.0000, 239.8700, 708.0000, 237.0000, 280.0000, 279.0000, 491.0000,
  291.          324.0000],
  292.         [495.8700, 247.7900, 701.0000, 247.0000, 292.0000, 294.0000, 456.5611,
  293.          306.1910],
  294.         [496.1200, 249.0500, 687.0000, 328.0000, 296.0000, 237.0000, 451.0000,
  295.          356.0000]], device='cuda:0')
  296. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  297. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  298. loss_train:  nan
  299. step:  6
  300. running loss:  nan
  301. Train Steps: 6/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  302.         [nan, nan, nan, nan, nan, nan, nan, nan],
  303.         [nan, nan, nan, nan, nan, nan, nan, nan],
  304.         [nan, nan, nan, nan, nan, nan, nan, nan],
  305.         [nan, nan, nan, nan, nan, nan, nan, nan],
  306.         [nan, nan, nan, nan, nan, nan, nan, nan],
  307.         [nan, nan, nan, nan, nan, nan, nan, nan],
  308.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  309.        grad_fn=<AddmmBackward>)
  310. landmarks are:  tensor([[489.1000, 241.0600, 577.0000, 118.0000, 301.0000, 162.0000, 470.0000,
  311.          332.0000],
  312.         [498.1200, 244.9800, 715.0000, 288.0000, 304.0000, 177.0000, 459.0000,
  313.          321.0000],
  314.         [491.4000, 238.9100, 692.0000, 293.0000, 313.0000, 259.0000, 425.2197,
  315.          321.0281],
  316.         [490.9100, 237.3000, 672.0000, 196.0000, 280.0000, 252.0000, 469.0000,
  317.          328.0000],
  318.         [491.4477, 242.0584, 704.0000, 290.0000, 361.0000, 322.0000, 423.0828,
  319.          305.8005],
  320.         [507.4213, 245.8110, 743.0000, 262.0000, 345.0000, 216.0000, 579.8230,
  321.          350.4485],
  322.         [     nan,      nan, 694.0000, 170.0000, 428.0000, 119.0000, 534.2356,
  323.          337.2599],
  324.         [499.9900, 243.2300, 701.6359, 283.3869, 373.0000, 322.0000, 493.0000,
  325.          326.0000]], device='cuda:0')
  326. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  327. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  328. loss_train:  nan
  329. step:  7
  330. running loss:  nan
  331. Train Steps: 7/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  332.         [nan, nan, nan, nan, nan, nan, nan, nan],
  333.         [nan, nan, nan, nan, nan, nan, nan, nan],
  334.         [nan, nan, nan, nan, nan, nan, nan, nan],
  335.         [nan, nan, nan, nan, nan, nan, nan, nan],
  336.         [nan, nan, nan, nan, nan, nan, nan, nan],
  337.         [nan, nan, nan, nan, nan, nan, nan, nan],
  338.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  339.        grad_fn=<AddmmBackward>)
  340. landmarks are:  tensor([[509.7032, 245.4983, 667.0000, 351.0000, 316.0000, 307.0000, 524.7382,
  341.          315.7417],
  342.         [495.9600, 243.5600, 696.0000, 294.0000, 333.0000, 300.0000, 493.0000,
  343.          328.0000],
  344.         [492.4400, 247.4700, 708.0000, 290.0000, 364.0000, 349.0000, 461.1910,
  345.          305.0196],
  346.         [497.7500, 250.2900, 708.0000, 313.0000, 299.0000, 276.0000, 456.0000,
  347.          338.0000],
  348.         [495.7000, 245.4200, 676.0000, 234.0000, 286.0000, 236.0000, 478.0000,
  349.          335.0000],
  350.         [509.1327, 248.6254, 690.0000, 185.0000, 393.0000, 120.0000, 515.8740,
  351.          316.4358],
  352.         [483.1600, 240.1100, 587.0000, 136.0000, 318.0000, 126.0000, 418.4528,
  353.          286.6684],
  354.         [499.1492, 246.4364, 652.9544, 165.8065, 290.0000, 216.0000, 479.0000,
  355.          342.0000]], device='cuda:0')
  356. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  357. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  358. loss_train:  nan
  359. step:  8
  360. running loss:  nan
  361. Train Steps: 8/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  362.         [nan, nan, nan, nan, nan, nan, nan, nan],
  363.         [nan, nan, nan, nan, nan, nan, nan, nan],
  364.         [nan, nan, nan, nan, nan, nan, nan, nan],
  365.         [nan, nan, nan, nan, nan, nan, nan, nan],
  366.         [nan, nan, nan, nan, nan, nan, nan, nan],
  367.         [nan, nan, nan, nan, nan, nan, nan, nan],
  368.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  369.        grad_fn=<AddmmBackward>)
  370. landmarks are:  tensor([[496.1000, 244.9000, 706.1387, 222.8866, 306.0016, 162.7361, 467.5804,
  371.          324.7273],
  372.         [494.3001, 247.0619, 582.2141, 254.2003, 287.0000, 204.0000, 454.0000,
  373.          355.0000],
  374.         [486.0700, 237.4800, 696.0000, 280.0000, 363.6628, 302.7423, 418.4528,
  375.          297.6010],
  376.         [     nan,      nan, 715.0000, 171.0000, 373.0000, 187.0000, 592.4862,
  377.          331.7068],
  378.         [500.0049, 240.4949, 716.0000, 251.0000, 284.0000, 263.0000, 508.9154,
  379.          295.6488],
  380.         [489.1000, 241.0600, 577.0000, 118.0000, 301.0000, 162.0000, 470.0000,
  381.          332.0000],
  382.         [     nan,      nan, 604.2729, 163.3048, 310.0000, 153.0000, 410.0000,
  383.          338.0000],
  384.         [488.0247, 244.2474, 608.0761, 206.6678, 272.0000, 247.0000, 450.0000,
  385.          337.0000]], device='cuda:0')
  386. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  387. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  388. loss_train:  nan
  389. step:  9
  390. running loss:  nan
  391. Train Steps: 9/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  392.         [nan, nan, nan, nan, nan, nan, nan, nan],
  393.         [nan, nan, nan, nan, nan, nan, nan, nan],
  394.         [nan, nan, nan, nan, nan, nan, nan, nan],
  395.         [nan, nan, nan, nan, nan, nan, nan, nan],
  396.         [nan, nan, nan, nan, nan, nan, nan, nan],
  397.         [nan, nan, nan, nan, nan, nan, nan, nan],
  398.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  399.        grad_fn=<AddmmBackward>)
  400. landmarks are:  tensor([[490.2100, 246.9800, 676.0000, 275.0000, 294.0000, 292.0000, 426.9483,
  401.          326.7660],
  402.         [505.9951, 243.9347, 675.0000, 321.0000, 314.0000, 316.0000, 569.0593,
  403.          347.6719],
  404.         [494.5853, 238.3059, 697.6390, 331.6491, 291.0425, 214.9496, 455.6557,
  405.          323.6706],
  406.         [492.0181, 236.1169, 695.7136, 309.4855, 371.7718, 319.7672, 483.3020,
  407.          309.1750],
  408.         [490.1700, 246.8700, 573.0000, 173.0000, 290.0000, 177.0000, 426.1470,
  409.          329.6944],
  410.         [496.2000, 243.9500, 671.8087, 158.8574, 314.0060, 157.6172, 467.5804,
  411.          307.4238],
  412.         [496.8673, 236.1169, 690.9001, 290.4881, 353.3458, 307.5619, 506.4407,
  413.          316.0934],
  414.         [500.2902, 239.5567, 719.0000, 286.0000, 319.0000, 331.0000, 556.3961,
  415.          317.1299]], device='cuda:0')
  416. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  417. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  418. loss_train:  nan
  419. step:  10
  420. running loss:  nan
  421. Train Steps: 10/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  422.         [nan, nan, nan, nan, nan, nan, nan, nan],
  423.         [nan, nan, nan, nan, nan, nan, nan, nan],
  424.         [nan, nan, nan, nan, nan, nan, nan, nan],
  425.         [nan, nan, nan, nan, nan, nan, nan, nan],
  426.         [nan, nan, nan, nan, nan, nan, nan, nan],
  427.         [nan, nan, nan, nan, nan, nan, nan, nan],
  428.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  429.        grad_fn=<AddmmBackward>)
  430. landmarks are:  tensor([[490.1000, 242.3100, 659.0000, 238.0000, 290.0000, 216.0000, 471.5194,
  431.          368.2727],
  432.         [495.9100, 243.6200, 711.0000, 280.0000, 304.0000, 303.0000, 495.0000,
  433.          326.0000],
  434.         [499.6700, 241.6600, 699.0000, 292.0000, 327.0000, 340.0000, 509.0000,
  435.          312.0000],
  436.         [493.2500, 240.4900, 685.0000, 340.0000, 351.0000, 296.0000, 446.0000,
  437.          334.0000],
  438.         [487.2800, 239.8400, 665.1248, 260.0376, 303.0000, 273.0000, 417.0651,
  439.          339.3581],
  440.         [507.7065, 245.1856, 635.0000, 330.0000, 317.0000, 292.0000, 587.4209,
  441.          342.1188],
  442.         [497.5500, 246.8200, 654.0000, 169.0000, 314.0000, 167.0000, 472.0000,
  443.          321.0000],
  444.         [494.5853, 238.3059, 684.1613, 354.8681, 294.6041, 250.8712, 455.0547,
  445.          322.6822]], device='cuda:0')
  446. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  447.  
  448. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  449. loss_train:  nan
  450. step:  11
  451. running loss:  nan
  452. Train Steps: 11/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  453.         [nan, nan, nan, nan, nan, nan, nan, nan],
  454.         [nan, nan, nan, nan, nan, nan, nan, nan],
  455.         [nan, nan, nan, nan, nan, nan, nan, nan],
  456.         [nan, nan, nan, nan, nan, nan, nan, nan],
  457.         [nan, nan, nan, nan, nan, nan, nan, nan],
  458.         [nan, nan, nan, nan, nan, nan, nan, nan],
  459.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  460.        grad_fn=<AddmmBackward>)
  461. landmarks are:  tensor([[504.7900, 241.0400, 685.0000, 348.0000, 295.0000, 285.0000, 506.0662,
  462.          300.3342],
  463.         [486.9100, 239.8900, 703.0000, 267.0000, 322.0000, 279.0000, 424.5074,
  464.          306.1910],
  465.         [493.1591, 238.3059, 625.4368, 202.8892, 287.8980, 203.2175, 470.3803,
  466.          309.1750],
  467.         [482.3900, 238.1600, 639.0000, 235.0000, 276.0000, 220.0000, 421.3020,
  468.          281.9830],
  469.         [492.9800, 236.9300, 707.0000, 271.0000, 340.0000, 311.0000, 467.0000,
  470.          330.0000],
  471.         [494.2245, 240.7296, 712.0364, 269.7266, 311.2985, 337.0000, 477.7910,
  472.          292.7526],
  473.         [490.8772, 241.7457, 661.0000, 201.0000, 290.0000, 184.0000, 454.0000,
  474.          310.0000],
  475.         [495.9600, 245.1500, 673.1207, 178.4619, 329.3477, 136.4104, 469.4820,
  476.          323.4764]], device='cuda:0')
  477. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  478. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  479. loss_train:  nan
  480. step:  12
  481. running loss:  nan
  482. Train Steps: 12/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  483.         [nan, nan, nan, nan, nan, nan, nan, nan],
  484.         [nan, nan, nan, nan, nan, nan, nan, nan],
  485.         [nan, nan, nan, nan, nan, nan, nan, nan],
  486.         [nan, nan, nan, nan, nan, nan, nan, nan],
  487.         [nan, nan, nan, nan, nan, nan, nan, nan],
  488.         [nan, nan, nan, nan, nan, nan, nan, nan],
  489.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  490.        grad_fn=<AddmmBackward>)
  491. landmarks are:  tensor([[501.9900, 244.8500, 645.0000, 121.0000, 386.0000,  95.0000, 492.4471,
  492.          292.1411],
  493.         [500.0000, 248.7500, 707.0000, 236.0000, 287.0000, 257.0000, 493.0000,
  494.          322.0000],
  495.         [486.9100, 239.8900, 703.0000, 267.0000, 322.0000, 279.0000, 424.5074,
  496.          306.1910],
  497.         [492.2300, 247.1400, 677.0000, 230.0000, 288.0000, 192.0000, 408.5175,
  498.          333.7941],
  499.         [500.9900, 249.7600, 708.0000, 311.0000, 301.0000, 249.0000, 482.0000,
  500.          330.0000],
  501.         [497.2900, 250.0000, 687.0000, 335.0000, 318.0000, 310.0000, 462.0000,
  502.          340.0000],
  503.         [501.8700, 246.5800, 712.0000, 229.0000, 335.0000, 130.0000, 468.6673,
  504.          290.0746],
  505.         [493.5600, 243.1200, 699.3539, 286.7225, 343.0000, 295.0000, 461.0000,
  506.          337.0000]], device='cuda:0')
  507. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  508. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  509. loss_train:  nan
  510. step:  13
  511. running loss:  nan
  512. Train Steps: 13/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  513.         [nan, nan, nan, nan, nan, nan, nan, nan],
  514.         [nan, nan, nan, nan, nan, nan, nan, nan],
  515.         [nan, nan, nan, nan, nan, nan, nan, nan],
  516.         [nan, nan, nan, nan, nan, nan, nan, nan],
  517.         [nan, nan, nan, nan, nan, nan, nan, nan],
  518.         [nan, nan, nan, nan, nan, nan, nan, nan],
  519.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  520.        grad_fn=<AddmmBackward>)
  521. landmarks are:  tensor([[494.0900, 241.0100, 703.0000, 306.0000, 326.0000, 315.0000, 473.0000,
  522.          302.0000],
  523.         [492.8739, 241.4330, 707.0000, 275.0000, 295.0000, 224.0000, 424.8636,
  524.          320.6377],
  525.         [483.3700, 239.4200, 546.4636, 172.4778, 280.0000, 188.0000, 411.4557,
  526.          330.5729],
  527.         [486.7600, 240.2900, 672.0000, 259.0000, 301.0000, 285.0000, 438.0412,
  528.          303.4578],
  529.         [503.7131, 240.4949, 732.0000, 259.0000, 341.0000, 183.0000, 580.4562,
  530.          324.7654],
  531.         [502.4100, 246.0400, 724.0000, 272.0000, 302.0000, 193.0000, 507.0098,
  532.          294.9176],
  533.         [490.5919, 243.9347, 580.6928, 144.1250, 287.0000, 198.0000, 480.0000,
  534.          336.0000],
  535.         [494.9600, 246.2100, 570.0437, 124.1114, 316.0000, 151.0000, 473.0000,
  536.          341.0000]], device='cuda:0')
  537. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  538. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  539. loss_train:  nan
  540. step:  14
  541. running loss:  nan
  542. Train Steps: 14/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  543.         [nan, nan, nan, nan, nan, nan, nan, nan],
  544.         [nan, nan, nan, nan, nan, nan, nan, nan],
  545.         [nan, nan, nan, nan, nan, nan, nan, nan],
  546.         [nan, nan, nan, nan, nan, nan, nan, nan],
  547.         [nan, nan, nan, nan, nan, nan, nan, nan],
  548.         [nan, nan, nan, nan, nan, nan, nan, nan],
  549.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  550.        grad_fn=<AddmmBackward>)
  551. landmarks are:  tensor([[499.9900, 243.2300, 701.6359, 283.3869, 373.0000, 322.0000, 493.0000,
  552.          326.0000],
  553.         [495.9000, 240.8800, 697.2715, 313.6593, 306.6687, 286.3209, 470.6230,
  554.          308.8831],
  555.         [507.1360, 247.3746, 691.0000, 322.0000, 326.0000, 328.0000, 601.3504,
  556.          326.1537],
  557.         [493.1591, 246.1237, 708.0000, 292.0000, 337.0000, 359.0000, 448.7257,
  558.          302.2865],
  559.         [495.7263, 247.0619, 695.0000, 331.0000, 323.0000, 314.0000, 470.0000,
  560.          336.0000],
  561.         [495.8200, 244.5800, 635.6002, 147.2001, 358.0301, 112.2785, 471.7639,
  562.          320.9747],
  563.         [490.3067, 235.8042, 701.4897, 306.3193, 331.7047, 338.5089, 479.9964,
  564.          304.8923],
  565.         [494.0100, 245.5700, 704.0000, 266.0000, 326.0000, 262.0000, 410.2613,
  566.          294.0870]], device='cuda:0')
  567. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  568. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  569. loss_train:  nan
  570. step:  15
  571. running loss:  nan
  572. Train Steps: 15/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  573.         [nan, nan, nan, nan, nan, nan, nan, nan],
  574.         [nan, nan, nan, nan, nan, nan, nan, nan],
  575.         [nan, nan, nan, nan, nan, nan, nan, nan],
  576.         [nan, nan, nan, nan, nan, nan, nan, nan],
  577.         [nan, nan, nan, nan, nan, nan, nan, nan],
  578.         [nan, nan, nan, nan, nan, nan, nan, nan],
  579.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  580.        grad_fn=<AddmmBackward>)
  581. landmarks are:  tensor([[489.6200, 240.8100, 549.0000, 169.0000, 296.0000, 167.0000, 441.0000,
  582.          340.0000],
  583.         [494.4200, 243.8400, 576.0000, 148.0000, 342.0000, 142.0000, 477.0000,
  584.          373.0000],
  585.         [498.1200, 244.9800, 715.0000, 288.0000, 304.0000, 177.0000, 459.0000,
  586.          321.0000],
  587.         [506.2803, 251.7526, 739.0000, 275.0000, 341.0000, 176.0000, 587.4209,
  588.          344.8954],
  589.         [495.8600, 244.5100, 692.0000, 337.0000, 332.0000, 262.0000, 436.0000,
  590.          339.0000],
  591.         [491.0200, 241.2100, 704.0000, 283.0000, 350.0000, 283.0000, 440.1781,
  592.          336.6462],
  593.         [498.0700, 246.1700, 686.0000, 207.0000, 351.0000, 124.0000, 463.0000,
  594.          323.0000],
  595.         [496.2100, 244.5800, 688.8793, 172.7032, 324.0115, 153.2296, 472.5629,
  596.          329.7797]], device='cuda:0')
  597. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  598. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  599. loss_train:  nan
  600. step:  16
  601. running loss:  nan
  602. Train Steps: 16/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  603.         [nan, nan, nan, nan, nan, nan, nan, nan],
  604.         [nan, nan, nan, nan, nan, nan, nan, nan],
  605.         [nan, nan, nan, nan, nan, nan, nan, nan],
  606.         [nan, nan, nan, nan, nan, nan, nan, nan],
  607.         [nan, nan, nan, nan, nan, nan, nan, nan],
  608.         [nan, nan, nan, nan, nan, nan, nan, nan],
  609.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  610.        grad_fn=<AddmmBackward>)
  611. landmarks are:  tensor([[501.9800, 249.4200, 667.0000, 348.0000, 301.0000, 252.0000, 443.7396,
  612.          367.4918],
  613.         [     nan,      nan, 669.0000, 199.0000, 285.0000, 202.0000, 426.2881,
  614.          308.5337],
  615.         [503.9984, 246.1237, 727.0000, 266.0000, 327.0000, 184.0000, 545.6324,
  616.          332.4009],
  617.         [484.8000, 235.4200, 676.0000, 343.0000, 336.0000, 313.0000, 420.2336,
  618.          285.1066],
  619.         [     nan,      nan, 611.8794, 163.3048, 317.0000, 131.0000, 404.7779,
  620.          325.3018],
  621.         [489.3200, 241.1600, 683.0000, 244.0000, 281.0000, 215.0000, 453.0000,
  622.          308.0000],
  623.         [507.1360, 246.1237, 727.0000, 286.0000, 314.0000, 317.0000, 600.7172,
  624.          323.3772],
  625.         [497.1100, 246.9600, 620.0000, 139.0000, 359.0000, 113.0000, 496.0000,
  626.          324.0000]], device='cuda:0')
  627. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  628.  
  629. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  630. loss_train:  nan
  631. step:  17
  632. running loss:  nan
  633. Train Steps: 17/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  634.         [nan, nan, nan, nan, nan, nan, nan, nan],
  635.         [nan, nan, nan, nan, nan, nan, nan, nan],
  636.         [nan, nan, nan, nan, nan, nan, nan, nan],
  637.         [nan, nan, nan, nan, nan, nan, nan, nan],
  638.         [nan, nan, nan, nan, nan, nan, nan, nan],
  639.         [nan, nan, nan, nan, nan, nan, nan, nan],
  640.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  641.        grad_fn=<AddmmBackward>)
  642. landmarks are:  tensor([[502.8574, 245.8110, 648.5415, 144.8417, 354.0000, 149.0000, 539.6033,
  643.          323.0817],
  644.         [492.8300, 245.1400, 702.0000, 239.0000, 300.0000, 227.0000, 410.2613,
  645.          292.9156],
  646.         [489.1657, 239.8694, 565.0000, 143.0000, 323.0000, 117.0000, 425.5759,
  647.          299.5533],
  648.         [494.8706, 240.8076, 655.2804, 191.2797, 342.3284, 123.5840, 461.3652,
  649.          324.6589],
  650.         [487.7600, 239.2700, 692.0000, 315.0000, 337.0000, 312.0000, 454.0000,
  651.          303.0000],
  652.         [483.3700, 239.4200, 546.4636, 172.4778, 280.0000, 188.0000, 411.4557,
  653.          330.5729],
  654.         [490.2000, 245.0600, 699.0000, 281.0000, 289.0000, 222.0000, 396.7645,
  655.          323.8376],
  656.         [494.0700, 241.7800, 708.0000, 291.0000, 295.0000, 243.0000, 425.0000,
  657.          347.0000]], device='cuda:0')
  658. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  659. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  660. loss_train:  nan
  661. step:  18
  662. running loss:  nan
  663. Train Steps: 18/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  664.         [nan, nan, nan, nan, nan, nan, nan, nan],
  665.         [nan, nan, nan, nan, nan, nan, nan, nan],
  666.         [nan, nan, nan, nan, nan, nan, nan, nan],
  667.         [nan, nan, nan, nan, nan, nan, nan, nan],
  668.         [nan, nan, nan, nan, nan, nan, nan, nan],
  669.         [nan, nan, nan, nan, nan, nan, nan, nan],
  670.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  671.        grad_fn=<AddmmBackward>)
  672. landmarks are:  tensor([[502.0200, 242.8900, 679.0000, 173.0000, 357.0000, 122.0000, 505.7100,
  673.          309.3146],
  674.         [501.7700, 247.2200, 723.0000, 247.0000, 298.0000, 192.0000, 494.0000,
  675.          315.0000],
  676.         [501.1800, 255.0400, 569.0000, 213.0000, 350.0000, 127.0000, 446.9449,
  677.          367.1014],
  678.         [491.6300, 240.4700, 700.0000, 323.0000, 318.0000, 279.0000, 445.0000,
  679.          332.0000],
  680.         [495.8300, 246.2900, 636.0000, 196.0000, 294.0000, 226.0000, 483.0000,
  681.          370.0000],
  682.         [506.5656, 247.0619, 739.0000, 256.0000, 321.0000, 284.0000, 601.9836,
  683.          326.1537],
  684.         [503.4279, 238.9313, 696.0000, 318.0000, 301.0000, 283.0000, 563.9941,
  685.          317.8241],
  686.         [496.1100, 241.6200, 642.3539, 163.6537, 323.3445, 138.6042, 478.4196,
  687.          323.4764]], device='cuda:0')
  688. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  689. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  690. loss_train:  nan
  691. step:  19
  692. running loss:  nan
  693. Train Steps: 19/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  694.         [nan, nan, nan, nan, nan, nan, nan, nan],
  695.         [nan, nan, nan, nan, nan, nan, nan, nan],
  696.         [nan, nan, nan, nan, nan, nan, nan, nan],
  697.         [nan, nan, nan, nan, nan, nan, nan, nan],
  698.         [nan, nan, nan, nan, nan, nan, nan, nan],
  699.         [nan, nan, nan, nan, nan, nan, nan, nan],
  700.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  701.        grad_fn=<AddmmBackward>)
  702. landmarks are:  tensor([[500.0049, 240.4949, 716.0000, 251.0000, 284.0000, 263.0000, 508.9154,
  703.          295.6488],
  704.         [496.3500, 242.9600, 638.0000, 162.0000, 315.0000, 154.0000, 456.0000,
  705.          311.0000],
  706.         [489.3200, 241.1600, 683.0000, 244.0000, 281.0000, 215.0000, 453.0000,
  707.          308.0000],
  708.         [497.2200, 247.1100, 615.0000, 138.0000, 336.0000, 137.0000, 474.0000,
  709.          319.0000],
  710.         [504.5688, 239.8694, 680.0000, 314.0000, 308.0000, 303.0000, 595.6520,
  711.          319.2123],
  712.         [501.0800, 242.3100, 720.0000, 264.0000, 290.0000, 280.0000, 513.9015,
  713.          288.2302],
  714.         [501.5100, 244.3800, 712.0000, 296.0000, 290.0000, 251.0000, 503.0000,
  715.          309.0000],
  716.         [501.8900, 245.2000, 670.0000, 342.0000, 322.0000, 288.0000, 456.0000,
  717.          367.0000]], device='cuda:0')
  718. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  719. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  720. loss_train:  nan
  721. step:  20
  722. running loss:  nan
  723. Train Steps: 20/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  724.         [nan, nan, nan, nan, nan, nan, nan, nan],
  725.         [nan, nan, nan, nan, nan, nan, nan, nan],
  726.         [nan, nan, nan, nan, nan, nan, nan, nan],
  727.         [nan, nan, nan, nan, nan, nan, nan, nan],
  728.         [nan, nan, nan, nan, nan, nan, nan, nan],
  729.         [nan, nan, nan, nan, nan, nan, nan, nan],
  730.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  731.        grad_fn=<AddmmBackward>)
  732. landmarks are:  tensor([[490.3600, 243.8000, 699.0000, 315.0000, 345.0000, 284.0000, 418.4304,
  733.          352.4393],
  734.         [500.9300, 243.1400, 711.0000, 282.0000, 294.0000, 307.0000, 508.0000,
  735.          314.0000],
  736.         [491.4477, 242.0584, 704.0000, 290.0000, 361.0000, 322.0000, 423.0828,
  737.          305.8005],
  738.         [508.8475, 246.1237, 692.0000, 179.0000, 391.0000, 120.0000, 536.1351,
  739.          327.5420],
  740.         [499.9900, 243.2300, 701.6359, 283.3869, 373.0000, 322.0000, 493.0000,
  741.          326.0000],
  742.         [494.0900, 241.0100, 703.0000, 306.0000, 326.0000, 315.0000, 473.0000,
  743.          302.0000],
  744.         [508.5623, 249.5636, 703.0000, 335.0000, 291.0000, 266.0000, 519.0398,
  745.          317.8241],
  746.         [496.3700, 240.9300, 668.0000, 163.0000, 319.0000, 153.0000, 463.0000,
  747.          308.0000]], device='cuda:0')
  748. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  749. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  750. loss_train:  nan
  751. step:  21
  752. running loss:  nan
  753. Train Steps: 21/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  754.         [nan, nan, nan, nan, nan, nan, nan, nan],
  755.         [nan, nan, nan, nan, nan, nan, nan, nan],
  756.         [nan, nan, nan, nan, nan, nan, nan, nan],
  757.         [nan, nan, nan, nan, nan, nan, nan, nan],
  758.         [nan, nan, nan, nan, nan, nan, nan, nan],
  759.         [nan, nan, nan, nan, nan, nan, nan, nan],
  760.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  761.        grad_fn=<AddmmBackward>)
  762. landmarks are:  tensor([[490.3600, 243.8000, 699.0000, 315.0000, 345.0000, 284.0000, 418.4304,
  763.          352.4393],
  764.         [491.6200, 238.9700, 696.0000, 301.0000, 352.0000, 288.0000, 430.0000,
  765.          345.0000],
  766.         [     nan,      nan, 575.3683, 140.7894, 323.0000, 123.0000, 411.0000,
  767.          339.0000],
  768.         [501.7164, 244.8729, 726.5198, 293.6544, 296.0000, 257.0000, 532.8420,
  769.          316.4105],
  770.         [487.3300, 240.0800, 691.0000, 292.0000, 343.0000, 322.0000, 438.7535,
  771.          303.8483],
  772.         [500.5000, 251.9400, 691.0000, 348.0000, 319.0000, 263.0000, 448.0000,
  773.          357.0000],
  774.         [498.8640, 237.9932, 694.0000, 324.0000, 309.0000, 271.0000, 466.0000,
  775.          312.0000],
  776.         [495.9100, 243.6200, 711.0000, 280.0000, 304.0000, 303.0000, 495.0000,
  777.          326.0000]], device='cuda:0')
  778. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  779. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  780. loss_train:  nan
  781. step:  22
  782. running loss:  nan
  783. Train Steps: 22/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  784.         [nan, nan, nan, nan, nan, nan, nan, nan],
  785.         [nan, nan, nan, nan, nan, nan, nan, nan],
  786.         [nan, nan, nan, nan, nan, nan, nan, nan],
  787.         [nan, nan, nan, nan, nan, nan, nan, nan],
  788.         [nan, nan, nan, nan, nan, nan, nan, nan],
  789.         [nan, nan, nan, nan, nan, nan, nan, nan],
  790.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  791.        grad_fn=<AddmmBackward>)
  792. landmarks are:  tensor([[502.2869, 240.4949, 688.0000, 293.0000, 346.0000, 317.0000, 560.8282,
  793.          322.6830],
  794.         [490.1900, 247.1700, 692.0000, 318.0000, 361.0000, 315.0000, 420.8047,
  795.          327.6445],
  796.         [505.1393, 242.9966, 658.1664, 325.8310, 332.0000, 331.0000, 569.6925,
  797.          341.4247],
  798.         [497.0900, 242.4000, 707.0000, 210.0000, 288.0000, 311.0000, 509.0000,
  799.          312.0000],
  800.         [501.2900, 243.9900, 687.0000, 172.0000, 344.0000, 171.0000, 506.0000,
  801.          316.0000],
  802.         [488.9500, 241.7100, 691.0000, 288.0000, 390.0000, 305.0000, 461.0000,
  803.          334.0000],
  804.         [495.1000, 234.6500, 704.0000, 295.0000, 297.0000, 288.0000, 483.0000,
  805.          290.0000],
  806.         [493.4800, 246.5000, 545.7030, 163.3048, 306.0000, 153.0000, 444.0000,
  807.          343.0000]], device='cuda:0')
  808. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  809.  
  810. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  811. loss_train:  nan
  812. step:  23
  813. running loss:  nan
  814. Train Steps: 23/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  815.         [nan, nan, nan, nan, nan, nan, nan, nan],
  816.         [nan, nan, nan, nan, nan, nan, nan, nan],
  817.         [nan, nan, nan, nan, nan, nan, nan, nan],
  818.         [nan, nan, nan, nan, nan, nan, nan, nan],
  819.         [nan, nan, nan, nan, nan, nan, nan, nan],
  820.         [nan, nan, nan, nan, nan, nan, nan, nan],
  821.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  822.        grad_fn=<AddmmBackward>)
  823. landmarks are:  tensor([[501.8300, 248.5600, 700.0000, 342.0000, 319.0000, 283.0000, 481.0000,
  824.          328.0000],
  825.         [     nan,      nan, 567.7618, 140.7894, 340.0000, 111.0000, 414.0000,
  826.          335.0000],
  827.         [487.7600, 239.2700, 692.0000, 315.0000, 337.0000, 312.0000, 454.0000,
  828.          303.0000],
  829.         [491.4477, 242.0584, 704.0000, 290.0000, 361.0000, 322.0000, 423.0828,
  830.          305.8005],
  831.         [497.5400, 245.8000, 699.0000, 204.0000, 285.0000, 247.0000, 478.0000,
  832.          341.0000],
  833.         [497.4900, 236.0200, 695.0000, 316.0000, 345.0000, 298.0000, 479.0000,
  834.          299.0000],
  835.         [494.5853, 238.9313, 603.2948, 142.7309, 316.7463, 167.4912, 486.3070,
  836.          323.3411],
  837.         [501.1200, 242.0800, 711.0000, 293.0000, 324.0000, 313.0000, 508.9154,
  838.          287.4493]], device='cuda:0')
  839. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  840. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  841. loss_train:  nan
  842. step:  24
  843. running loss:  nan
  844. Train Steps: 24/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  845.         [nan, nan, nan, nan, nan, nan, nan, nan],
  846.         [nan, nan, nan, nan, nan, nan, nan, nan],
  847.         [nan, nan, nan, nan, nan, nan, nan, nan],
  848.         [nan, nan, nan, nan, nan, nan, nan, nan],
  849.         [nan, nan, nan, nan, nan, nan, nan, nan],
  850.         [nan, nan, nan, nan, nan, nan, nan, nan],
  851.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  852.        grad_fn=<AddmmBackward>)
  853. landmarks are:  tensor([[486.9300, 238.5500, 667.0000, 232.0000, 297.0000, 187.0000, 475.0000,
  854.          318.0000],
  855.         [494.5853, 238.3059, 697.6390, 331.6491, 291.0425, 214.9496, 455.6557,
  856.          323.6706],
  857.         [492.8300, 245.1400, 702.0000, 239.0000, 300.0000, 227.0000, 410.2613,
  858.          292.9156],
  859.         [507.1360, 244.8729, 674.0000, 325.0000, 308.0000, 290.0000, 586.7878,
  860.          345.5895],
  861.         [487.6300, 240.1400, 682.6197, 310.0718, 402.5010, 305.6570, 410.0000,
  862.          326.0000],
  863.         [502.7100, 247.6400, 683.0000, 352.0000, 349.0000, 305.0000, 483.0000,
  864.          326.0000],
  865.         [500.9900, 249.7600, 708.0000, 311.0000, 301.0000, 249.0000, 482.0000,
  866.          330.0000],
  867.         [494.8706, 240.8076, 655.2804, 191.2797, 342.3284, 123.5840, 461.3652,
  868.          324.6589]], device='cuda:0')
  869. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  870. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  871. loss_train:  nan
  872. step:  25
  873. running loss:  nan
  874. Train Steps: 25/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  875.         [nan, nan, nan, nan, nan, nan, nan, nan],
  876.         [nan, nan, nan, nan, nan, nan, nan, nan],
  877.         [nan, nan, nan, nan, nan, nan, nan, nan],
  878.         [nan, nan, nan, nan, nan, nan, nan, nan],
  879.         [nan, nan, nan, nan, nan, nan, nan, nan],
  880.         [nan, nan, nan, nan, nan, nan, nan, nan],
  881.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  882.        grad_fn=<AddmmBackward>)
  883. landmarks are:  tensor([[500.0049, 240.4949, 716.0000, 251.0000, 284.0000, 263.0000, 508.9154,
  884.          295.6488],
  885.         [489.9400, 241.8100, 692.0000, 292.0000, 399.9338, 306.3606, 410.9215,
  886.          346.3862],
  887.         [503.1100, 241.8900, 673.4919, 326.7499, 326.0000, 301.0000, 505.0000,
  888.          307.0000],
  889.         [494.5853, 238.3059, 697.6390, 331.6491, 291.0425, 214.9496, 455.6557,
  890.          323.6706],
  891.         [499.7000, 245.3900, 557.1127, 121.6097, 314.0000, 161.0000, 487.0000,
  892.          335.0000],
  893.         [496.7200, 235.1800, 692.0000, 322.0000, 352.0000, 304.0000, 482.0000,
  894.          297.0000],
  895.         [496.1400, 243.1500, 691.0000, 317.0000, 363.6628, 306.4949, 472.0000,
  896.          308.0000],
  897.         [486.6900, 238.8800, 687.0000, 314.0000, 368.0000, 322.0000, 454.4241,
  898.          300.3342]], device='cuda:0')
  899. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  900. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  901. loss_train:  nan
  902. step:  26
  903. running loss:  nan
  904. Train Steps: 26/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  905.         [nan, nan, nan, nan, nan, nan, nan, nan],
  906.         [nan, nan, nan, nan, nan, nan, nan, nan],
  907.         [nan, nan, nan, nan, nan, nan, nan, nan],
  908.         [nan, nan, nan, nan, nan, nan, nan, nan],
  909.         [nan, nan, nan, nan, nan, nan, nan, nan],
  910.         [nan, nan, nan, nan, nan, nan, nan, nan],
  911.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  912.        grad_fn=<AddmmBackward>)
  913. landmarks are:  tensor([[501.7164, 242.3712, 731.0000, 225.0000, 370.0000, 157.0000, 578.5567,
  914.          324.7654],
  915.         [490.9200, 242.2600, 685.0000, 243.0000, 305.0000, 153.0000, 408.4806,
  916.          297.2106],
  917.         [502.8574, 238.6186, 723.0000, 284.0000, 312.0000, 249.0000, 565.8936,
  918.          319.2123],
  919.         [501.3200, 243.8900, 665.0000, 148.0000, 383.0000, 104.0000, 505.0000,
  920.          308.0000],
  921.         [492.0181, 245.8110, 597.4271, 191.6575, 306.0000, 158.0000, 437.0000,
  922.          348.0000],
  923.         [497.7600, 236.2000, 668.0000, 337.0000, 331.0000, 276.0000, 464.0000,
  924.          314.0000],
  925.         [495.9400, 237.1000, 685.8777, 322.4308, 326.0126, 281.2020, 475.3770,
  926.          322.6425],
  927.         [487.5800, 238.2200, 695.0000, 286.0000, 388.7642, 292.7355, 415.2475,
  928.          296.4297]], device='cuda:0')
  929. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  930. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  931. loss_train:  nan
  932. step:  27
  933. running loss:  nan
  934. Train Steps: 27/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  935.         [nan, nan, nan, nan, nan, nan, nan, nan],
  936.         [nan, nan, nan, nan, nan, nan, nan, nan],
  937.         [nan, nan, nan, nan, nan, nan, nan, nan],
  938.         [nan, nan, nan, nan, nan, nan, nan, nan],
  939.         [nan, nan, nan, nan, nan, nan, nan, nan],
  940.         [nan, nan, nan, nan, nan, nan, nan, nan],
  941.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  942.        grad_fn=<AddmmBackward>)
  943. landmarks are:  tensor([[497.7300, 244.3100, 573.0863, 129.9487, 299.0000, 190.0000, 488.0000,
  944.          332.0000],
  945.         [491.1800, 244.2000, 696.0000, 300.0000, 369.0000, 294.0000, 420.8344,
  946.          351.1215],
  947.         [500.8607, 243.9347, 666.0000, 129.0000, 381.0000, 160.0000, 560.1951,
  948.          337.9540],
  949.         [503.1100, 241.8900, 673.4919, 326.7499, 326.0000, 301.0000, 505.0000,
  950.          307.0000],
  951.         [490.9200, 242.2600, 685.0000, 243.0000, 305.0000, 153.0000, 408.4806,
  952.          297.2106],
  953.         [497.1525, 246.7491, 627.0000, 127.0000, 292.0000, 188.0000, 454.0000,
  954.          305.0000],
  955.         [491.4477, 242.3712, 659.0000, 200.0000, 326.0000, 127.0000, 410.9736,
  956.          298.3820],
  957.         [496.4300, 240.2100, 715.0000, 293.0000, 293.0000, 300.0000, 508.5592,
  958.          296.8201]], device='cuda:0')
  959. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  960. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  961. loss_train:  nan
  962. step:  28
  963. running loss:  nan
  964. Train Steps: 28/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  965.         [nan, nan, nan, nan, nan, nan, nan, nan],
  966.         [nan, nan, nan, nan, nan, nan, nan, nan],
  967.         [nan, nan, nan, nan, nan, nan, nan, nan],
  968.         [nan, nan, nan, nan, nan, nan, nan, nan],
  969.         [nan, nan, nan, nan, nan, nan, nan, nan],
  970.         [nan, nan, nan, nan, nan, nan, nan, nan],
  971.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  972.        grad_fn=<AddmmBackward>)
  973. landmarks are:  tensor([[495.9600, 242.9100, 691.0000, 337.0000, 330.0000, 306.0000, 481.0000,
  974.          319.0000],
  975.         [501.7164, 242.3712, 731.0000, 225.0000, 370.0000, 157.0000, 578.5567,
  976.          324.7654],
  977.         [503.4279, 238.9313, 696.0000, 318.0000, 301.0000, 283.0000, 563.9941,
  978.          317.8241],
  979.         [501.4312, 241.7457, 680.0000, 161.0000, 315.0000, 210.0000, 548.7982,
  980.          317.8241],
  981.         [489.3200, 241.1600, 683.0000, 244.0000, 281.0000, 215.0000, 453.0000,
  982.          308.0000],
  983.         [490.9200, 245.0200, 646.1086, 233.3527, 292.0000, 189.0000, 428.4472,
  984.          353.7570],
  985.         [483.8400, 239.2300, 609.5974, 231.6849, 278.0000, 250.0000, 410.9215,
  986.          327.9374],
  987.         [496.1400, 243.1500, 691.0000, 317.0000, 363.6628, 306.4949, 472.0000,
  988.          308.0000]], device='cuda:0')
  989. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  990.  
  991. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  992. loss_train:  nan
  993. step:  29
  994. running loss:  nan
  995. Train Steps: 29/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  996.         [nan, nan, nan, nan, nan, nan, nan, nan],
  997.         [nan, nan, nan, nan, nan, nan, nan, nan],
  998.         [nan, nan, nan, nan, nan, nan, nan, nan],
  999.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1000.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1001.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1002.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1003.        grad_fn=<AddmmBackward>)
  1004. landmarks are:  tensor([[490.1700, 246.8700, 573.0000, 173.0000, 290.0000, 177.0000, 426.1470,
  1005.          329.6944],
  1006.         [491.7329, 244.8729, 683.0000, 204.0000, 293.0000, 189.0000, 411.3298,
  1007.          292.5252],
  1008.         [     nan,      nan, 535.0539, 150.7963, 329.0000, 127.0000, 415.4254,
  1009.          355.9533],
  1010.         [501.9900, 244.8500, 645.0000, 121.0000, 386.0000,  95.0000, 492.4471,
  1011.          292.1411],
  1012.         [     nan,      nan, 690.0000, 153.0000, 439.0000, 132.0000, 586.7878,
  1013.          344.2012],
  1014.         [490.1900, 247.1700, 692.0000, 318.0000, 361.0000, 315.0000, 420.8047,
  1015.          327.6445],
  1016.         [508.5623, 245.8110, 723.0000, 233.0000, 337.0000, 177.0000, 534.8688,
  1017.          323.3772],
  1018.         [487.2000, 240.6200, 627.0000, 209.0000, 283.0000, 227.0000, 436.9727,
  1019.          304.6292]], device='cuda:0')
  1020. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1021. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1022. loss_train:  nan
  1023. step:  30
  1024. running loss:  nan
  1025. Train Steps: 30/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1026.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1027.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1028.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1029.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1030.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1031.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1032.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1033.        grad_fn=<AddmmBackward>)
  1034. landmarks are:  tensor([[504.5688, 242.6839, 642.0000, 350.0000, 302.0000, 292.0000, 551.3309,
  1035.          327.5420],
  1036.         [490.8800, 245.1000, 535.0000, 139.0000, 309.0000, 142.0000, 441.3725,
  1037.          346.9719],
  1038.         [507.1360, 247.0619, 639.0000, 348.0000, 313.0000, 275.0000, 587.4209,
  1039.          345.5895],
  1040.         [502.0016, 242.9966, 723.0000, 226.0000, 307.0000, 212.0000, 565.8936,
  1041.          334.4833],
  1042.         [486.5300, 242.5300, 558.0000, 115.0000, 328.0000, 119.0000, 440.1781,
  1043.          334.6939],
  1044.         [505.1393, 242.9966, 658.1664, 325.8310, 332.0000, 331.0000, 569.6925,
  1045.          341.4247],
  1046.         [485.1400, 241.1400, 692.0000, 271.0000, 323.0000, 322.0000, 456.2567,
  1047.          336.5189],
  1048.         [501.1459, 238.3059, 708.2286, 286.2665, 308.2703, 270.6642, 503.4357,
  1049.          320.0467]], device='cuda:0')
  1050. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1051. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1052. loss_train:  nan
  1053. step:  31
  1054. running loss:  nan
  1055. Train Steps: 31/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1056.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1057.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1058.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1059.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1060.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1061.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1062.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1063.        grad_fn=<AddmmBackward>)
  1064. landmarks are:  tensor([[489.7400, 240.3700, 708.0000, 253.0000, 327.0000, 331.0000, 485.0000,
  1065.          331.0000],
  1066.         [486.9800, 237.0500, 671.0000, 350.0000, 335.0000, 296.0000, 411.6859,
  1067.          289.7920],
  1068.         [500.0800, 246.5000, 711.0000, 282.0000, 346.0000, 349.0000, 486.0000,
  1069.          309.0000],
  1070.         [482.6800, 240.6500, 588.0000, 152.0000, 275.0000, 202.0000, 441.2465,
  1071.          305.0196],
  1072.         [495.8700, 247.7900, 701.0000, 247.0000, 292.0000, 294.0000, 456.5611,
  1073.          306.1910],
  1074.         [489.0300, 246.2400, 692.0000, 255.0000, 314.0000, 358.0000, 462.9718,
  1075.          306.9719],
  1076.         [498.6900, 241.4000, 711.0000, 278.0000, 318.0000, 346.0000, 512.0000,
  1077.          311.0000],
  1078.         [502.0016, 241.4330, 688.0000, 137.0000, 428.0000, 108.0000, 565.8936,
  1079.          324.7654]], device='cuda:0')
  1080. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1081. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1082. loss_train:  nan
  1083. step:  32
  1084. running loss:  nan
  1085. Train Steps: 32/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1086.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1087.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1088.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1089.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1090.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1091.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1092.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1093.        grad_fn=<AddmmBackward>)
  1094. landmarks are:  tensor([[501.6600, 241.1700, 670.0000, 365.0000, 314.0000, 292.0000, 482.9497,
  1095.          277.5642],
  1096.         [502.1200, 243.3900, 664.0000, 159.0000, 349.0000, 111.0000, 491.1808,
  1097.          289.3645],
  1098.         [494.8706, 243.6220, 700.0000, 303.0000, 283.0000, 262.0000, 465.0000,
  1099.          365.0000],
  1100.         [499.2800, 248.5500, 715.0000, 279.0000, 326.0000, 321.0000, 500.0000,
  1101.          333.0000],
  1102.         [488.6800, 242.1600, 575.0000, 105.0000, 308.0000, 153.0000, 469.0000,
  1103.          334.0000],
  1104.         [494.7600, 242.5300, 616.0000, 135.0000, 325.0000, 127.0000, 461.0000,
  1105.          309.0000],
  1106.         [503.9984, 246.1237, 727.0000, 266.0000, 327.0000, 184.0000, 545.6324,
  1107.          332.4009],
  1108.         [493.4800, 246.5000, 545.7030, 163.3048, 306.0000, 153.0000, 444.0000,
  1109.          343.0000]], device='cuda:0')
  1110. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1111. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1112. loss_train:  nan
  1113. step:  33
  1114. running loss:  nan
  1115. Train Steps: 33/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1116.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1117.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1118.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1119.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1120.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1121.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1122.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1123.        grad_fn=<AddmmBackward>)
  1124. landmarks are:  tensor([[501.2500, 244.2100, 697.0000, 336.0000, 297.0000, 287.0000, 462.0000,
  1125.          366.0000],
  1126.         [483.0400, 236.7800, 673.0000, 293.0000, 285.0000, 273.0000, 421.3020,
  1127.          281.5925],
  1128.         [486.0700, 237.4800, 696.0000, 280.0000, 363.6628, 302.7423, 418.4528,
  1129.          297.6010],
  1130.         [486.4500, 236.9900, 683.0000, 280.0000, 308.0000, 295.0000, 427.3566,
  1131.          297.2106],
  1132.         [503.6500, 239.3200, 720.0000, 249.0000, 289.0000, 232.0000, 512.0000,
  1133.          306.0000],
  1134.         [502.8574, 243.3093, 720.0000, 283.0000, 301.0000, 281.0000, 561.4614,
  1135.          329.6244],
  1136.         [483.4400, 241.3300, 551.0000, 119.0000, 302.0000, 149.0000, 438.3973,
  1137.          308.1432],
  1138.         [495.4411, 242.0584, 620.5229, 140.8099, 298.6643, 175.1677, 474.4262,
  1139.          295.5407]], device='cuda:0')
  1140. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1141. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1142. loss_train:  nan
  1143. step:  34
  1144. running loss:  nan
  1145. Train Steps: 34/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1146.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1147.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1148.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1149.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1150.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1151.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1152.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1153.        grad_fn=<AddmmBackward>)
  1154. landmarks are:  tensor([[500.0400, 246.9800, 696.0000, 291.0000, 372.0000, 334.0000, 487.0000,
  1155.          311.0000],
  1156.         [     nan,      nan, 609.0000, 195.0000, 323.0000, 152.0000, 435.0000,
  1157.          346.0000],
  1158.         [500.2902, 239.5567, 719.0000, 286.0000, 319.0000, 331.0000, 556.3961,
  1159.          317.1299],
  1160.         [496.3600, 244.8300, 716.0000, 241.0000, 303.0000, 282.0000, 477.0000,
  1161.          340.0000],
  1162.         [502.8574, 242.0584, 655.2804, 144.8417, 340.9529, 143.5759, 509.1774,
  1163.          321.3521],
  1164.         [495.4411, 242.9966, 585.0000, 146.0000, 326.0000, 127.0000, 451.9311,
  1165.          339.3793],
  1166.         [497.9000, 243.6700, 719.0000, 258.0000, 307.0000, 285.0000, 489.0000,
  1167.          329.0000],
  1168.         [     nan,      nan, 677.0000, 153.0000, 468.0000, 128.0000, 570.3256,
  1169.          364.3312]], device='cuda:0')
  1170. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1171.  
  1172. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1173. loss_train:  nan
  1174. step:  35
  1175. running loss:  nan
  1176. Train Steps: 35/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1177.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1178.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1179.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1180.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1181.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1182.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1183.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1184.        grad_fn=<AddmmBackward>)
  1185. landmarks are:  tensor([[494.5853, 238.9313, 603.2948, 142.7309, 316.7463, 167.4912, 486.3070,
  1186.          323.3411],
  1187.         [490.5919, 247.0619, 656.0000, 218.0000, 285.0000, 324.0000, 462.9718,
  1188.          308.1432],
  1189.         [494.8706, 249.2509, 546.0000, 158.0000, 332.0000, 138.0000, 457.0000,
  1190.          331.0000],
  1191.         [501.7900, 244.2400, 699.0000, 336.0000, 294.0000, 227.0000, 474.0855,
  1192.          284.5056],
  1193.         [507.7065, 245.4983, 617.0000, 355.0000, 323.0000, 286.0000, 587.4209,
  1194.          343.5071],
  1195.         [496.5700, 246.5700, 699.0000, 300.0000, 384.0000, 338.0000, 504.0000,
  1196.          326.0000],
  1197.         [494.5853, 239.8694, 703.4152, 251.4380, 284.1584, 257.0997, 483.0015,
  1198.          318.7289],
  1199.         [500.8607, 239.2440, 723.6318, 252.4934, 288.0082, 277.9555, 525.6299,
  1200.          309.7392]], device='cuda:0')
  1201. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1202. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1203. loss_train:  nan
  1204. step:  36
  1205. running loss:  nan
  1206. Train Steps: 36/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1207.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1208.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1209.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1210.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1211.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1212.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1213.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1214.        grad_fn=<AddmmBackward>)
  1215. landmarks are:  tensor([[498.6900, 241.4000, 711.0000, 278.0000, 318.0000, 346.0000, 512.0000,
  1216.          311.0000],
  1217.         [495.1558, 237.6805, 607.1456, 135.3430, 300.7195, 196.7750, 494.7211,
  1218.          320.0467],
  1219.         [496.8600, 244.1200, 700.0000, 307.0000, 332.0000, 294.0000, 470.0000,
  1220.          310.0000],
  1221.         [     nan,      nan, 664.0000, 189.0000, 287.0000, 203.0000, 416.6721,
  1222.          311.6573],
  1223.         [499.7197, 240.4949, 700.5271, 305.2639, 328.0690, 323.5423, 531.4897,
  1224.          308.0097],
  1225.         [486.0200, 240.0200, 681.0000, 311.0000, 360.7842, 319.7291, 414.3940,
  1226.          316.8095],
  1227.         [492.8739, 243.3093, 694.0000, 232.0000, 297.0000, 242.0000, 415.6036,
  1228.          309.7050],
  1229.         [500.9500, 245.1100, 675.0000, 189.0000, 322.0000, 158.0000, 507.1346,
  1230.          288.6207]], device='cuda:0')
  1231. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1232. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1233. loss_train:  nan
  1234. step:  37
  1235. running loss:  nan
  1236. Train Steps: 37/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1237.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1238.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1239.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1240.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1241.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1242.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1243.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1244.        grad_fn=<AddmmBackward>)
  1245. landmarks are:  tensor([[496.3500, 242.9600, 638.0000, 162.0000, 315.0000, 154.0000, 456.0000,
  1246.          311.0000],
  1247.         [494.8706, 237.9932, 586.9290, 119.5119, 329.5677, 150.5065, 488.4105,
  1248.          323.6706],
  1249.         [495.8100, 239.8400, 686.5782, 321.6804, 329.3477, 300.9463, 475.3770,
  1250.          308.0492],
  1251.         [501.2500, 246.0400, 668.0000, 140.0000, 316.0000, 177.0000, 501.0800,
  1252.          295.2583],
  1253.         [500.7600, 247.1900, 641.0000, 141.0000, 391.0000,  92.0000, 502.5046,
  1254.          293.6965],
  1255.         [491.6200, 238.9700, 696.0000, 301.0000, 352.0000, 288.0000, 430.0000,
  1256.          345.0000],
  1257.         [493.4444, 244.8729, 675.0000, 202.0000, 280.0000, 280.0000, 497.0000,
  1258.          324.0000],
  1259.         [495.9900, 244.1900, 715.0000, 251.0000, 283.0000, 274.0000, 494.0000,
  1260.          324.0000]], device='cuda:0')
  1261. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1262. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1263. loss_train:  nan
  1264. step:  38
  1265. running loss:  nan
  1266. Train Steps: 38/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1267.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1268.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1269.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1270.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1271.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1272.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1273.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1274.        grad_fn=<AddmmBackward>)
  1275. landmarks are:  tensor([[493.5600, 243.1200, 699.3539, 286.7225, 343.0000, 295.0000, 461.0000,
  1276.          337.0000],
  1277.         [501.0800, 241.7700, 720.0000, 286.0000, 304.0000, 310.0000, 513.1892,
  1278.          286.2780],
  1279.         [502.8574, 242.3712, 695.7136, 182.8364, 313.9077, 173.2258, 504.0367,
  1280.          322.0233],
  1281.         [501.0900, 244.0100, 724.0000, 251.0000, 302.0000, 276.0000, 504.6415,
  1282.          291.7443],
  1283.         [495.4411, 242.9966, 585.0000, 146.0000, 326.0000, 127.0000, 451.9311,
  1284.          339.3793],
  1285.         [492.0900, 245.0800, 700.0000, 262.0000, 311.0000, 262.0000, 405.3121,
  1286.          350.7788],
  1287.         [491.7300, 241.5500, 528.0000, 148.0000, 327.0000, 129.0000, 439.1096,
  1288.          346.4075],
  1289.         [501.4500, 243.6300, 668.0000, 146.0000, 366.0000, 137.0000, 508.0000,
  1290.          318.0000]], device='cuda:0')
  1291. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1292. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1293. loss_train:  nan
  1294. step:  39
  1295. running loss:  nan
  1296. Train Steps: 39/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1297.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1298.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1299.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1300.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1301.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1302.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1303.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1304.        grad_fn=<AddmmBackward>)
  1305. landmarks are:  tensor([[494.1100, 234.6700, 699.0000, 253.0000, 279.0000, 242.0000, 482.0000,
  1306.          289.0000],
  1307.         [505.1393, 242.9966, 658.1664, 325.8310, 332.0000, 331.0000, 569.6925,
  1308.          341.4247],
  1309.         [485.8600, 235.9000, 669.0000, 349.0000, 354.0000, 307.0000, 416.3159,
  1310.          289.0111],
  1311.         [491.0200, 243.2600, 700.0000, 285.0000, 349.0000, 301.0000, 406.9148,
  1312.          349.3146],
  1313.         [498.2500, 240.2200, 700.0000, 315.0000, 306.0000, 314.0000, 509.0000,
  1314.          300.0000],
  1315.         [     nan,      nan, 711.0000, 186.0000, 421.0000, 169.0000, 571.5920,
  1316.          360.1664],
  1317.         [506.8508, 245.1856, 712.0000, 237.0000, 312.0000, 177.0000, 520.3061,
  1318.          303.9413],
  1319.         [501.1100, 244.2300, 723.0000, 232.0000, 293.0000, 235.0000, 507.0000,
  1320.          317.0000]], device='cuda:0')
  1321. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1322. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1323. loss_train:  nan
  1324. step:  40
  1325. running loss:  nan
  1326. Train Steps: 40/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1327.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1328.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1329.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1330.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1331.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1332.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1333.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1334.        grad_fn=<AddmmBackward>)
  1335. landmarks are:  tensor([[484.7500, 240.1200, 605.0000, 166.0000, 283.0000, 182.0000, 442.3150,
  1336.          327.2754],
  1337.         [496.9300, 241.9800, 715.0000, 250.0000, 305.0000, 256.0000, 449.0000,
  1338.          335.0000],
  1339.         [495.9000, 240.8800, 697.2715, 313.6593, 306.6687, 286.3209, 470.6230,
  1340.          308.8831],
  1341.         [500.0049, 240.8076, 682.0000, 325.0000, 323.0000, 307.0000, 506.0000,
  1342.          301.0000],
  1343.         [503.7131, 242.6839, 731.0000, 246.0000, 338.5284, 254.5401, 593.7525,
  1344.          317.8241],
  1345.         [496.2700, 244.6800, 704.0000, 305.0000, 312.0000, 300.0000, 488.0000,
  1346.          335.0000],
  1347.         [     nan,      nan, 548.7455, 131.6165, 332.0000, 112.0000, 412.2570,
  1348.          343.7506],
  1349.         [498.6600, 245.4800, 648.0000, 177.0000, 285.0000, 233.0000, 481.0000,
  1350.          312.0000]], device='cuda:0')
  1351. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1352.  
  1353. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1354. loss_train:  nan
  1355. step:  41
  1356. running loss:  nan
  1357. Train Steps: 41/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1358.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1359.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1360.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1361.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1362.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1363.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1364.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1365.        grad_fn=<AddmmBackward>)
  1366. landmarks are:  tensor([[496.1500, 243.9800, 699.6677, 202.5880, 297.3302, 185.4055, 467.3902,
  1367.          309.9255],
  1368.         [494.1500, 245.1300, 699.0000, 237.0000, 302.0000, 336.0000, 498.0000,
  1369.          342.0000],
  1370.         [494.5853, 237.9932, 661.0566, 183.8918, 282.0930, 249.6721, 495.3221,
  1371.          317.4111],
  1372.         [486.8500, 236.3900, 697.0000, 287.0000, 322.0000, 294.0000, 439.8219,
  1373.          323.3708],
  1374.         [494.2300, 243.5200, 602.0000, 135.0000, 345.0000, 107.0000, 432.3427,
  1375.          314.3904],
  1376.         [506.8508, 245.1856, 712.0000, 237.0000, 312.0000, 177.0000, 520.3061,
  1377.          303.9413],
  1378.         [490.2100, 246.9800, 676.0000, 275.0000, 294.0000, 292.0000, 426.9483,
  1379.          326.7660],
  1380.         [493.4200, 246.6600, 521.3622, 172.4778, 295.0000, 169.0000, 418.2301,
  1381.          350.2430]], device='cuda:0')
  1382. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1383. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1384. loss_train:  nan
  1385. step:  42
  1386. running loss:  nan
  1387. Train Steps: 42/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1388.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1389.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1390.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1391.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1392.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1393.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1394.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1395.        grad_fn=<AddmmBackward>)
  1396. landmarks are:  tensor([[495.9900, 239.9300, 692.2063, 312.4252, 328.0137, 307.5277, 478.0393,
  1397.          306.1729],
  1398.         [494.5900, 243.5100, 699.0000, 261.0000, 285.0000, 204.0000, 423.2087,
  1399.          349.3146],
  1400.         [501.9800, 249.4200, 667.0000, 348.0000, 301.0000, 252.0000, 443.7396,
  1401.          367.4918],
  1402.         [502.7100, 247.6400, 683.0000, 352.0000, 349.0000, 305.0000, 483.0000,
  1403.          326.0000],
  1404.         [492.8739, 244.5602, 712.0000, 280.0000, 330.0000, 355.0000, 501.0000,
  1405.          322.0000],
  1406.         [506.5656, 249.8763, 728.0000, 201.0000, 335.0000, 221.0000, 595.0188,
  1407.          331.7068],
  1408.         [497.1100, 246.9600, 620.0000, 139.0000, 359.0000, 113.0000, 496.0000,
  1409.          324.0000],
  1410.         [496.5300, 244.8400, 613.0000, 124.0000, 317.0000, 192.0000, 505.0000,
  1411.          318.0000]], device='cuda:0')
  1412. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1413. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1414. loss_train:  nan
  1415. step:  43
  1416. running loss:  nan
  1417. Train Steps: 43/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1418.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1419.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1420.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1421.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1422.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1423.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1424.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1425.        grad_fn=<AddmmBackward>)
  1426. landmarks are:  tensor([[496.1000, 242.9900, 620.5920, 134.0372, 356.6960, 107.8909, 478.0393,
  1427.          325.5612],
  1428.         [488.0700, 242.5300, 622.0000, 157.0000, 297.0000, 169.0000, 435.1920,
  1429.          338.9889],
  1430.         [500.5000, 251.9400, 691.0000, 348.0000, 319.0000, 263.0000, 448.0000,
  1431.          357.0000],
  1432.         [501.3800, 245.6600, 697.0000, 185.0000, 352.0000, 136.0000, 500.0000,
  1433.          312.0000],
  1434.         [500.0049, 237.6805, 693.7882, 295.7652, 335.9166, 298.3375, 504.9382,
  1435.          318.0700],
  1436.         [501.2900, 243.9900, 687.0000, 172.0000, 344.0000, 171.0000, 506.0000,
  1437.          316.0000],
  1438.         [494.3001, 239.5567, 714.0048, 287.3219, 310.3356, 293.9974, 483.3020,
  1439.          316.7523],
  1440.         [504.5688, 241.7457, 692.0000, 312.0000, 301.0000, 241.0000, 584.8883,
  1441.          321.9889]], device='cuda:0')
  1442. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1443. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1444. loss_train:  nan
  1445. step:  44
  1446. running loss:  nan
  1447. Train Steps: 44/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1448.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1449.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1450.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1451.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1452.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1453.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1454.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1455.        grad_fn=<AddmmBackward>)
  1456. landmarks are:  tensor([[497.4600, 248.2400, 581.0000, 134.0000, 326.0000, 159.0000, 497.0000,
  1457.          347.0000],
  1458.         [493.1591, 238.3059, 625.4368, 202.8892, 287.8980, 203.2175, 470.3803,
  1459.          309.1750],
  1460.         [492.8739, 244.5602, 712.0000, 280.0000, 330.0000, 355.0000, 501.0000,
  1461.          322.0000],
  1462.         [495.4400, 244.7100, 583.0000, 150.0000, 340.0000, 153.0000, 479.1144,
  1463.          375.9738],
  1464.         [490.1500, 245.0200, 696.0000, 268.0000, 319.0000, 259.0000, 401.0383,
  1465.          328.2302],
  1466.         [494.0300, 245.4600, 629.0000, 168.0000, 291.0000, 215.0000, 495.0000,
  1467.          326.0000],
  1468.         [487.5800, 238.2200, 695.0000, 286.0000, 388.7642, 292.7355, 415.2475,
  1469.          296.4297],
  1470.         [506.8508, 245.1856, 712.0000, 237.0000, 312.0000, 177.0000, 520.3061,
  1471.          303.9413]], device='cuda:0')
  1472. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1473. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1474. loss_train:  nan
  1475. step:  45
  1476. running loss:  nan
  1477. Train Steps: 45/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1478.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1479.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1480.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1481.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1482.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1483.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1484.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1485.        grad_fn=<AddmmBackward>)
  1486. landmarks are:  tensor([[501.1800, 245.4400, 716.0000, 212.0000, 288.0000, 238.0000, 503.5731,
  1487.          294.0870],
  1488.         [501.9800, 249.4200, 667.0000, 348.0000, 301.0000, 252.0000, 443.7396,
  1489.          367.4918],
  1490.         [483.8800, 235.7000, 683.0000, 326.0000, 310.0000, 307.0000, 421.3020,
  1491.          283.1544],
  1492.         [489.3200, 241.0900, 525.0000, 118.0000, 299.0000, 153.0000, 422.3705,
  1493.          306.1910],
  1494.         [501.3600, 244.7900, 675.0000, 158.0000, 381.0000, 108.0000, 500.7239,
  1495.          314.3904],
  1496.         [486.3200, 237.8300, 593.6238, 177.4812, 285.0000, 175.0000, 428.0689,
  1497.          298.7724],
  1498.         [504.3800, 238.9900, 716.0000, 290.0000, 295.0000, 281.0000, 510.0000,
  1499.          307.0000],
  1500.         [490.2100, 246.9800, 676.0000, 275.0000, 294.0000, 292.0000, 426.9483,
  1501.          326.7660]], device='cuda:0')
  1502. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1503. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1504. loss_train:  nan
  1505. step:  46
  1506. running loss:  nan
  1507. Train Steps: 46/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1508.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1509.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1510.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1511.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1512.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1513.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1514.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1515.        grad_fn=<AddmmBackward>)
  1516. landmarks are:  tensor([[495.8800, 245.5700, 633.0000, 152.0000, 343.0000, 148.0000, 478.0000,
  1517.          342.0000],
  1518.         [502.9100, 244.7000, 645.0000, 136.0000, 383.0000,  92.0000, 476.2652,
  1519.          294.7599],
  1520.         [495.9200, 243.9100, 607.8350, 143.0867, 345.3565, 118.8599, 474.6164,
  1521.          313.2611],
  1522.         [500.9900, 249.7600, 708.0000, 311.0000, 301.0000, 249.0000, 482.0000,
  1523.          330.0000],
  1524.         [501.0300, 253.9500, 633.9382, 277.5496, 303.0000, 173.0000, 445.8765,
  1525.          362.8064],
  1526.         [502.1800, 247.0500, 719.0000, 230.0000, 316.0000, 159.0000, 503.2108,
  1527.          296.3059],
  1528.         [496.0300, 237.6400, 676.8727, 329.8349, 331.3488, 274.6205, 471.3836,
  1529.          324.1019],
  1530.         [495.7263, 235.8042, 703.4152, 284.1557, 332.1768, 327.8600, 504.6377,
  1531.          314.1167]], device='cuda:0')
  1532. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1533.  
  1534. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1535. loss_train:  nan
  1536. step:  47
  1537. running loss:  nan
  1538. Train Steps: 47/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1539.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1540.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1541.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1542.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1543.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1544.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1545.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1546.        grad_fn=<AddmmBackward>)
  1547. landmarks are:  tensor([[     nan,      nan, 535.0539, 150.7963, 329.0000, 127.0000, 415.4254,
  1548.          355.9533],
  1549.         [486.6400, 237.4500, 691.0000, 297.0000, 349.0000, 305.0000, 427.7128,
  1550.          298.7724],
  1551.         [482.6800, 240.6500, 588.0000, 152.0000, 275.0000, 202.0000, 441.2465,
  1552.          305.0196],
  1553.         [493.7296, 243.3093, 654.0000, 159.0000, 284.0000, 221.0000, 463.0000,
  1554.          333.0000],
  1555.         [495.9900, 244.1900, 715.0000, 251.0000, 283.0000, 274.0000, 494.0000,
  1556.          324.0000],
  1557.         [498.0700, 246.1700, 686.0000, 207.0000, 351.0000, 124.0000, 463.0000,
  1558.          323.0000],
  1559.         [491.8400, 243.1800, 700.0000, 273.0000, 387.9706, 313.0692, 469.0000,
  1560.          334.0000],
  1561.         [494.7600, 244.7600, 707.0000, 277.0000, 387.0000, 339.0000, 494.0000,
  1562.          351.0000]], device='cuda:0')
  1563. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1564. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1565. loss_train:  nan
  1566. step:  48
  1567. running loss:  nan
  1568. Train Steps: 48/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1569.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1570.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1571.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1572.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1573.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1574.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1575.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1576.        grad_fn=<AddmmBackward>)
  1577. landmarks are:  tensor([[492.8739, 242.3712, 602.0000, 128.0000, 330.0000, 124.0000, 463.0000,
  1578.          307.0000],
  1579.         [496.0400, 242.3200, 710.4314, 287.9666, 289.9929, 257.0700, 469.2918,
  1580.          308.8831],
  1581.         [485.6100, 238.7500, 686.0000, 305.0000, 348.0000, 324.0000, 414.3940,
  1582.          327.9374],
  1583.         [496.1200, 249.0500, 687.0000, 328.0000, 296.0000, 237.0000, 451.0000,
  1584.          356.0000],
  1585.         [502.2869, 240.4949, 688.0000, 293.0000, 346.0000, 317.0000, 560.8282,
  1586.          322.6830],
  1587.         [488.3100, 241.7100, 699.0000, 294.0000, 327.0000, 272.0000, 405.5792,
  1588.          330.5729],
  1589.         [497.0800, 247.8600, 675.0000, 213.0000, 281.0000, 264.0000, 457.2733,
  1590.          307.3623],
  1591.         [494.5853, 238.3059, 697.6390, 331.6491, 291.0425, 214.9496, 455.6557,
  1592.          323.6706]], device='cuda:0')
  1593. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1594. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1595. loss_train:  nan
  1596. step:  49
  1597. running loss:  nan
  1598. Train Steps: 49/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1599.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1600.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1601.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1602.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1603.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1604.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1605.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1606.        grad_fn=<AddmmBackward>)
  1607. landmarks are:  tensor([[484.2800, 242.0800, 551.7881, 114.9384, 320.0000, 127.0000, 435.1920,
  1608.          310.0955],
  1609.         [497.2900, 246.0200, 588.0000, 124.0000, 346.0000, 123.0000, 476.0000,
  1610.          320.0000],
  1611.         [491.6200, 238.9700, 696.0000, 301.0000, 352.0000, 288.0000, 430.0000,
  1612.          345.0000],
  1613.         [501.7164, 242.3712, 731.0000, 225.0000, 370.0000, 157.0000, 578.5567,
  1614.          324.7654],
  1615.         [495.9800, 239.5800, 691.1306, 221.2412, 292.6610, 188.3306, 480.1311,
  1616.          322.4341],
  1617.         [498.2935, 246.4364, 651.0000, 173.0000, 380.0000, 103.0000, 465.0000,
  1618.          324.0000],
  1619.         [490.2500, 241.3400, 699.0000, 304.0000, 398.6197, 313.8339, 429.1374,
  1620.          303.8483],
  1621.         [495.8800, 245.5700, 633.0000, 152.0000, 343.0000, 148.0000, 478.0000,
  1622.          342.0000]], device='cuda:0')
  1623. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1624. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1625. loss_train:  nan
  1626. step:  50
  1627. running loss:  nan
  1628. Train Steps: 50/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1629.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1630.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1631.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1632.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1633.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1634.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1635.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1636.        grad_fn=<AddmmBackward>)
  1637. landmarks are:  tensor([[495.2000, 248.0900, 640.0000, 293.0000, 285.2831, 218.8381, 449.0000,
  1638.          354.0000],
  1639.         [507.1360, 247.3746, 691.0000, 322.0000, 326.0000, 328.0000, 601.3504,
  1640.          326.1537],
  1641.         [490.9100, 237.3000, 672.0000, 196.0000, 280.0000, 252.0000, 469.0000,
  1642.          328.0000],
  1643.         [487.5800, 238.2200, 695.0000, 286.0000, 388.7642, 292.7355, 415.2475,
  1644.          296.4297],
  1645.         [500.8607, 239.2440, 723.6318, 252.4934, 288.0082, 277.9555, 525.6299,
  1646.          309.7392],
  1647.         [496.1400, 243.1500, 691.0000, 317.0000, 363.6628, 306.4949, 472.0000,
  1648.          308.0000],
  1649.         [496.2100, 245.7600, 709.0000, 256.0000, 283.0000, 247.0000, 482.0000,
  1650.          339.0000],
  1651.         [     nan,      nan, 708.0000, 169.0000, 409.0000, 131.0000, 574.7578,
  1652.          326.1537]], device='cuda:0')
  1653. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1654. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1655. loss_train:  nan
  1656. step:  51
  1657. running loss:  nan
  1658. Train Steps: 51/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1659.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1660.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1661.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1662.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1663.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1664.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1665.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1666.        grad_fn=<AddmmBackward>)
  1667. landmarks are:  tensor([[     nan,      nan, 581.4534, 139.9555, 330.0000, 116.0000, 409.0000,
  1668.          323.0000],
  1669.         [492.0900, 245.0800, 700.0000, 262.0000, 311.0000, 262.0000, 405.3121,
  1670.          350.7788],
  1671.         [497.7500, 250.2900, 708.0000, 313.0000, 299.0000, 276.0000, 456.0000,
  1672.          338.0000],
  1673.         [503.9984, 246.1237, 727.0000, 266.0000, 327.0000, 184.0000, 545.6324,
  1674.          332.4009],
  1675.         [496.1200, 249.0500, 687.0000, 328.0000, 296.0000, 237.0000, 451.0000,
  1676.          356.0000],
  1677.         [499.8200, 251.0800, 680.0000, 346.0000, 357.0000, 273.0000, 449.0000,
  1678.          355.0000],
  1679.         [501.3200, 243.8900, 665.0000, 148.0000, 383.0000, 104.0000, 505.0000,
  1680.          308.0000],
  1681.         [     nan,      nan, 517.5590, 116.6062, 322.0000, 120.0000, 410.0000,
  1682.          332.0000]], device='cuda:0')
  1683. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1684. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1685. loss_train:  nan
  1686. step:  52
  1687. running loss:  nan
  1688. Train Steps: 52/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1689.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1690.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1691.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1692.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1693.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1694.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1695.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1696.        grad_fn=<AddmmBackward>)
  1697. landmarks are:  tensor([[488.1700, 239.9200, 700.0000, 308.0000, 306.0000, 285.0000, 451.0000,
  1698.          305.0000],
  1699.         [491.8400, 239.7800, 679.0000, 232.0000, 279.0000, 244.0000, 469.0000,
  1700.          300.0000],
  1701.         [500.2902, 244.5602, 696.0000, 196.0000, 332.0000, 185.0000, 563.9941,
  1702.          336.5657],
  1703.         [498.8700, 237.9300, 708.0000, 298.0000, 291.0000, 241.0000, 468.0000,
  1704.          311.0000],
  1705.         [490.5700, 242.2000, 557.0000, 133.0000, 328.0000, 117.0000, 431.6304,
  1706.          310.4859],
  1707.         [     nan,      nan, 677.0000, 153.0000, 468.0000, 128.0000, 570.3256,
  1708.          364.3312],
  1709.         [494.3001, 247.0619, 582.2141, 254.2003, 287.0000, 204.0000, 454.0000,
  1710.          355.0000],
  1711.         [490.2100, 246.9000, 634.6989, 226.6815, 276.0000, 242.0000, 426.9483,
  1712.          327.3517]], device='cuda:0')
  1713. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1714.  
  1715. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1716. loss_train:  nan
  1717. step:  53
  1718. running loss:  nan
  1719. Train Steps: 53/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1720.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1721.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1722.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1723.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1724.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1725.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1726.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1727.        grad_fn=<AddmmBackward>)
  1728. landmarks are:  tensor([[508.8475, 244.5602, 709.0000, 321.0000, 297.0000, 279.0000, 532.3361,
  1729.          317.8241],
  1730.         [492.0181, 245.8110, 597.4271, 191.6575, 306.0000, 158.0000, 437.0000,
  1731.          348.0000],
  1732.         [504.8541, 240.4949, 634.8398, 344.7328, 312.0000, 302.0000, 556.3961,
  1733.          321.9889],
  1734.         [497.7600, 246.4500, 715.0000, 254.0000, 288.0000, 229.0000, 466.0000,
  1735.          317.0000],
  1736.         [     nan,      nan, 708.0000, 169.0000, 409.0000, 131.0000, 574.7578,
  1737.          326.1537],
  1738.         [497.7600, 236.2000, 668.0000, 337.0000, 331.0000, 276.0000, 464.0000,
  1739.          314.0000],
  1740.         [507.9900, 242.6000, 699.0000, 339.0000, 308.0000, 285.0000, 512.0750,
  1741.          297.0000],
  1742.         [486.3900, 240.6200, 700.0000, 270.0000, 386.0000, 337.0000, 467.0000,
  1743.          335.0000]], device='cuda:0')
  1744. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1745. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1746. loss_train:  nan
  1747. step:  54
  1748. running loss:  nan
  1749. Train Steps: 54/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1750.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1751.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1752.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1753.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1754.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1755.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1756.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1757.        grad_fn=<AddmmBackward>)
  1758. landmarks are:  tensor([[483.1600, 240.1100, 587.0000, 136.0000, 318.0000, 126.0000, 418.4528,
  1759.          286.6684],
  1760.         [490.1700, 246.8700, 573.0000, 173.0000, 290.0000, 177.0000, 426.1470,
  1761.          329.6944],
  1762.         [490.2100, 246.9800, 676.0000, 275.0000, 294.0000, 292.0000, 426.9483,
  1763.          326.7660],
  1764.         [502.2869, 242.9966, 642.0000, 132.0000, 345.0000, 164.0000, 545.6324,
  1765.          319.2123],
  1766.         [486.9100, 241.1400, 622.5284, 256.7020, 290.0000, 261.0000, 411.9899,
  1767.          317.1024],
  1768.         [499.6700, 241.6600, 699.0000, 292.0000, 327.0000, 340.0000, 509.0000,
  1769.          312.0000],
  1770.         [484.5200, 240.6700, 700.0000, 256.0000, 352.0000, 348.0000, 467.6018,
  1771.          335.0844],
  1772.         [488.1700, 239.9200, 700.0000, 308.0000, 306.0000, 285.0000, 451.0000,
  1773.          305.0000]], device='cuda:0')
  1774. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1775. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1776. loss_train:  nan
  1777. step:  55
  1778. running loss:  nan
  1779. Train Steps: 55/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1780.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1781.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1782.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1783.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1784.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1785.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1786.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1787.        grad_fn=<AddmmBackward>)
  1788. landmarks are:  tensor([[496.2400, 244.3600, 655.1108, 143.9094, 352.0268, 123.2475, 474.3377,
  1789.          330.0576],
  1790.         [505.1393, 246.4364, 700.0000, 306.0000, 303.0000, 294.0000, 569.6925,
  1791.          351.8367],
  1792.         [496.1000, 246.1100, 583.0000, 145.0000, 332.0000, 143.0000, 488.0000,
  1793.          330.0000],
  1794.         [495.8300, 246.2900, 636.0000, 196.0000, 294.0000, 226.0000, 483.0000,
  1795.          370.0000],
  1796.         [     nan,      nan, 682.0000, 133.0000, 433.0000, 142.0000, 589.3204,
  1797.          328.9302],
  1798.         [507.4213, 245.8110, 743.0000, 262.0000, 345.0000, 216.0000, 579.8230,
  1799.          350.4485],
  1800.         [494.5600, 245.9300, 625.0000, 180.0000, 315.0000, 142.0000, 426.0000,
  1801.          345.0000],
  1802.         [504.2836, 241.4330, 714.0000, 288.0000, 315.0000, 289.0000, 598.8177,
  1803.          317.8241]], device='cuda:0')
  1804. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1805. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1806. loss_train:  nan
  1807. step:  56
  1808. running loss:  nan
  1809. Train Steps: 56/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1810.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1811.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1812.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1813.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1814.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1815.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1816.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1817.        grad_fn=<AddmmBackward>)
  1818. landmarks are:  tensor([[501.7164, 242.3712, 731.0000, 225.0000, 370.0000, 157.0000, 578.5567,
  1819.          324.7654],
  1820.         [494.5853, 235.8042, 707.2659, 233.4960, 284.4970, 297.9904, 497.7261,
  1821.          316.7523],
  1822.         [502.0016, 242.9966, 723.0000, 226.0000, 307.0000, 212.0000, 565.8936,
  1823.          334.4833],
  1824.         [     nan,      nan, 711.0000, 186.0000, 421.0000, 169.0000, 571.5920,
  1825.          360.1664],
  1826.         [503.9300, 240.5100, 676.0000, 321.0000, 337.0000, 300.0000, 508.0000,
  1827.          306.0000],
  1828.         [498.7100, 250.7200, 626.0000, 207.0000, 305.0000, 172.0000, 454.0000,
  1829.          337.0000],
  1830.         [502.0300, 246.6600, 677.0000, 157.0000, 359.0000, 119.0000, 496.8792,
  1831.          294.2234],
  1832.         [491.9500, 243.2500, 537.0000, 140.0000, 322.0000, 121.0000, 417.0651,
  1833.          341.7008]], device='cuda:0')
  1834. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1835. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1836. loss_train:  nan
  1837. step:  57
  1838. running loss:  nan
  1839. Train Steps: 57/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1840.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1841.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1842.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1843.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1844.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1845.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1846.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1847.        grad_fn=<AddmmBackward>)
  1848. landmarks are:  tensor([[483.3700, 239.4200, 546.4636, 172.4778, 280.0000, 188.0000, 411.4557,
  1849.          330.5729],
  1850.         [486.5985, 241.4330, 699.0000, 241.0000, 295.0000, 237.0000, 424.5074,
  1851.          308.1432],
  1852.         [496.2968, 243.3093, 675.0000, 344.0000, 365.9118, 288.0521, 439.0000,
  1853.          337.0000],
  1854.         [483.0400, 236.7800, 673.0000, 293.0000, 285.0000, 273.0000, 421.3020,
  1855.          281.5925],
  1856.         [497.5500, 246.8200, 654.0000, 169.0000, 314.0000, 167.0000, 472.0000,
  1857.          321.0000],
  1858.         [498.3700, 249.1100, 607.0000, 137.0000, 321.0000, 173.0000, 496.0000,
  1859.          346.0000],
  1860.         [500.9500, 245.1100, 675.0000, 189.0000, 322.0000, 158.0000, 507.1346,
  1861.          288.6207],
  1862.         [495.7000, 245.4200, 676.0000, 234.0000, 286.0000, 236.0000, 478.0000,
  1863.          335.0000]], device='cuda:0')
  1864. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1865. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1866. loss_train:  nan
  1867. step:  58
  1868. running loss:  nan
  1869. Train Steps: 58/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1870.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1871.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1872.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1873.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1874.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1875.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1876.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1877.        grad_fn=<AddmmBackward>)
  1878. landmarks are:  tensor([[492.9600, 240.8300, 704.0000, 320.0000, 300.0000, 289.0000, 479.0000,
  1879.          317.0000],
  1880.         [502.1200, 243.3900, 664.0000, 159.0000, 349.0000, 111.0000, 491.1808,
  1881.          289.3645],
  1882.         [490.2100, 246.9000, 634.6989, 226.6815, 276.0000, 242.0000, 426.9483,
  1883.          327.3517],
  1884.         [488.7400, 242.4700, 558.0000, 190.0000, 281.0000, 203.0000, 412.2570,
  1885.          319.1522],
  1886.         [490.9100, 237.3000, 672.0000, 196.0000, 280.0000, 252.0000, 469.0000,
  1887.          328.0000],
  1888.         [494.5853, 235.8042, 707.2659, 233.4960, 284.4970, 297.9904, 497.7261,
  1889.          316.7523],
  1890.         [490.5200, 243.8100, 691.0000, 312.0000, 383.0000, 287.0000, 420.6341,
  1891.          352.0000],
  1892.         [487.8500, 239.4600, 691.0000, 283.0000, 341.0000, 298.0000, 417.0000,
  1893.          339.0000]], device='cuda:0')
  1894. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1895.  
  1896. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1897. loss_train:  nan
  1898. step:  59
  1899. running loss:  nan
  1900. Train Steps: 59/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1901.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1902.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1903.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1904.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1905.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1906.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1907.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1908.        grad_fn=<AddmmBackward>)
  1909. landmarks are:  tensor([[501.9000, 245.9300, 690.0000, 194.0000, 352.0000, 119.0000, 470.0919,
  1910.          292.1570],
  1911.         [486.3900, 240.6200, 700.0000, 270.0000, 386.0000, 337.0000, 467.0000,
  1912.          335.0000],
  1913.         [483.8400, 239.2300, 609.5974, 231.6849, 278.0000, 250.0000, 410.9215,
  1914.          327.9374],
  1915.         [495.3700, 238.8200, 566.2405, 164.9726, 340.0000, 126.0000, 436.0000,
  1916.          347.0000],
  1917.         [502.8574, 238.6186, 723.0000, 284.0000, 312.0000, 249.0000, 565.8936,
  1918.          319.2123],
  1919.         [496.1000, 244.9000, 706.1387, 222.8866, 306.0016, 162.7361, 467.5804,
  1920.          324.7273],
  1921.         [507.1360, 244.8729, 674.0000, 325.0000, 308.0000, 290.0000, 586.7878,
  1922.          345.5895],
  1923.         [     nan,      nan, 643.0000, 149.0000, 318.0000, 151.0000, 446.0000,
  1924.          336.0000]], device='cuda:0')
  1925. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1926. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1927. loss_train:  nan
  1928. step:  60
  1929. running loss:  nan
  1930. Train Steps: 60/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1931.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1932.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1933.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1934.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1935.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1936.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1937.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1938.        grad_fn=<AddmmBackward>)
  1939. landmarks are:  tensor([[497.1100, 246.9600, 620.0000, 139.0000, 359.0000, 113.0000, 496.0000,
  1940.          324.0000],
  1941.         [498.3700, 249.1100, 607.0000, 137.0000, 321.0000, 173.0000, 496.0000,
  1942.          346.0000],
  1943.         [509.1327, 249.8763, 727.0000, 262.0000, 326.0000, 189.0000, 515.8740,
  1944.          317.8241],
  1945.         [496.2700, 244.6800, 704.0000, 305.0000, 312.0000, 300.0000, 488.0000,
  1946.          335.0000],
  1947.         [495.4411, 249.8763, 707.0000, 282.0000, 332.0000, 292.0000, 434.1604,
  1948.          315.6382],
  1949.         [492.8739, 241.4330, 707.0000, 275.0000, 295.0000, 224.0000, 424.8636,
  1950.          320.6377],
  1951.         [494.0300, 245.4600, 629.0000, 168.0000, 291.0000, 215.0000, 495.0000,
  1952.          326.0000],
  1953.         [497.9500, 245.8000, 595.0000, 136.0000, 308.0000, 171.0000, 479.0000,
  1954.          315.0000]], device='cuda:0')
  1955. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1956. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1957. loss_train:  nan
  1958. step:  61
  1959. running loss:  nan
  1960. Train Steps: 61/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1961.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1962.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1963.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1964.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1965.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1966.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1967.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1968.        grad_fn=<AddmmBackward>)
  1969. landmarks are:  tensor([[496.4600, 247.3500, 574.0000, 144.0000, 311.0000, 176.0000, 498.0000,
  1970.          345.0000],
  1971.         [498.2400, 247.1600, 635.0000, 134.0000, 373.9316, 106.3581, 495.0000,
  1972.          326.0000],
  1973.         [500.9100, 247.8700, 715.0000, 213.0000, 320.0000, 161.0000, 495.0000,
  1974.          317.0000],
  1975.         [500.5754, 243.6220, 664.0000, 140.0000, 375.0000, 155.0000, 563.9941,
  1976.          337.9540],
  1977.         [505.1393, 246.4364, 700.0000, 306.0000, 303.0000, 294.0000, 569.6925,
  1978.          351.8367],
  1979.         [491.7329, 244.8729, 683.0000, 204.0000, 293.0000, 189.0000, 411.3298,
  1980.          292.5252],
  1981.         [501.9000, 245.9300, 690.0000, 194.0000, 352.0000, 119.0000, 470.0919,
  1982.          292.1570],
  1983.         [502.6900, 241.7500, 707.0000, 227.0000, 318.0000, 171.0000, 506.7784,
  1984.          305.4101]], device='cuda:0')
  1985. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1986. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  1987. loss_train:  nan
  1988. step:  62
  1989. running loss:  nan
  1990. Train Steps: 62/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  1991.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1992.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1993.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1994.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1995.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1996.         [nan, nan, nan, nan, nan, nan, nan, nan],
  1997.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  1998.        grad_fn=<AddmmBackward>)
  1999. landmarks are:  tensor([[495.2000, 248.0900, 640.0000, 293.0000, 285.2831, 218.8381, 449.0000,
  2000.          354.0000],
  2001.         [500.0049, 240.8076, 682.0000, 325.0000, 323.0000, 307.0000, 506.0000,
  2002.          301.0000],
  2003.         [496.0300, 237.6400, 676.8727, 329.8349, 331.3488, 274.6205, 471.3836,
  2004.          324.1019],
  2005.         [494.7600, 244.7600, 707.0000, 277.0000, 387.0000, 339.0000, 494.0000,
  2006.          351.0000],
  2007.         [490.2700, 247.0800, 691.0000, 320.0000, 370.0000, 316.0000, 415.4624,
  2008.          328.5230],
  2009.         [494.8400, 247.7400, 712.0000, 274.0000, 315.0000, 325.0000, 458.6980,
  2010.          306.5814],
  2011.         [499.1492, 246.4364, 652.9544, 165.8065, 290.0000, 216.0000, 479.0000,
  2012.          342.0000],
  2013.         [502.6800, 245.5500, 712.0000, 282.0000, 292.0000, 231.0000, 497.0000,
  2014.          310.0000]], device='cuda:0')
  2015. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2016. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2017. loss_train:  nan
  2018. step:  63
  2019. running loss:  nan
  2020. Train Steps: 63/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2021.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2022.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2023.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2024.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2025.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2026.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2027.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2028.        grad_fn=<AddmmBackward>)
  2029. landmarks are:  tensor([[     nan,      nan, 669.0000, 199.0000, 285.0000, 202.0000, 426.2881,
  2030.          308.5337],
  2031.         [495.8600, 244.5100, 692.0000, 337.0000, 332.0000, 262.0000, 436.0000,
  2032.          339.0000],
  2033.         [498.4000, 246.7900, 577.0000, 119.0000, 346.0000, 142.0000, 501.0000,
  2034.          324.0000],
  2035.         [     nan,      nan, 601.2303, 162.4709, 319.0000, 136.0000, 413.0000,
  2036.          334.0000],
  2037.         [508.5623, 247.0619, 672.0000, 150.0000, 433.0000,  98.0000, 538.0345,
  2038.          335.1775],
  2039.         [494.4200, 243.8400, 576.0000, 148.0000, 342.0000, 142.0000, 477.0000,
  2040.          373.0000],
  2041.         [493.0700, 240.3800, 703.0000, 281.0000, 293.0000, 293.0000, 471.0000,
  2042.          301.0000],
  2043.         [496.0500, 246.9500, 698.0000, 284.0000, 296.0000, 193.0000, 430.9181,
  2044.          346.0170]], device='cuda:0')
  2045. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2046. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2047. loss_train:  nan
  2048. step:  64
  2049. running loss:  nan
  2050. Train Steps: 64/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2051.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2052.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2053.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2054.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2055.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2056.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2057.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2058.        grad_fn=<AddmmBackward>)
  2059. landmarks are:  tensor([[498.1200, 244.9800, 715.0000, 288.0000, 304.0000, 177.0000, 459.0000,
  2060.          321.0000],
  2061.         [495.2000, 248.0900, 640.0000, 293.0000, 285.2831, 218.8381, 449.0000,
  2062.          354.0000],
  2063.         [489.0800, 240.3300, 696.0000, 321.0000, 291.0000, 226.0000, 407.7683,
  2064.          292.9156],
  2065.         [507.9918, 248.6254, 740.0000, 246.0000, 330.0000, 225.0000, 570.3256,
  2066.          356.6957],
  2067.         [502.2869, 242.9966, 642.0000, 132.0000, 345.0000, 164.0000, 545.6324,
  2068.          319.2123],
  2069.         [491.0200, 243.2600, 700.0000, 285.0000, 349.0000, 301.0000, 406.9148,
  2070.          349.3146],
  2071.         [503.7131, 243.6220, 728.0000, 196.0000, 378.0822, 202.0042, 595.6520,
  2072.          321.2947],
  2073.         [501.9300, 247.0000, 648.0244, 348.0684, 320.0000, 275.0000, 446.5888,
  2074.          367.1014]], device='cuda:0')
  2075. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2076.  
  2077. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2078. loss_train:  nan
  2079. step:  65
  2080. running loss:  nan
  2081. Train Steps: 65/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2082.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2083.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2084.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2085.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2086.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2087.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2088.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2089.        grad_fn=<AddmmBackward>)
  2090. landmarks are:  tensor([[491.7329, 246.7491, 639.0000, 192.0000, 302.0000, 166.0000, 412.0000,
  2091.          333.0000],
  2092.         [501.0600, 243.8700, 723.0000, 259.0000, 287.0000, 273.0000, 506.0000,
  2093.          315.0000],
  2094.         [493.4444, 243.3093, 606.0000, 177.0000, 324.0000, 163.0000, 475.0000,
  2095.          370.0000],
  2096.         [503.1100, 241.8900, 673.4919, 326.7499, 326.0000, 301.0000, 505.0000,
  2097.          307.0000],
  2098.         [494.5853, 237.9932, 661.0566, 183.8918, 282.0930, 249.6721, 495.3221,
  2099.          317.4111],
  2100.         [499.4344, 247.6873, 621.0000, 163.0000, 306.0000, 188.0000, 497.0000,
  2101.          325.0000],
  2102.         [501.9000, 245.9300, 690.0000, 194.0000, 352.0000, 119.0000, 470.0919,
  2103.          292.1570],
  2104.         [486.3200, 237.8300, 593.6238, 177.4812, 285.0000, 175.0000, 428.0689,
  2105.          298.7724]], device='cuda:0')
  2106. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2107. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2108. loss_train:  nan
  2109. step:  66
  2110. running loss:  nan
  2111. Train Steps: 66/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2112.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2113.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2114.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2115.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2116.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2117.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2118.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2119.        grad_fn=<AddmmBackward>)
  2120. landmarks are:  tensor([[498.6600, 245.4800, 648.0000, 177.0000, 285.0000, 233.0000, 481.0000,
  2121.          312.0000],
  2122.         [498.8640, 238.6186, 718.8183, 288.3773, 306.7708, 312.9795, 529.0106,
  2123.          309.9863],
  2124.         [502.0016, 241.4330, 617.7352, 124.7889, 351.3434, 134.0229, 514.8118,
  2125.          317.3988],
  2126.         [501.2500, 244.2100, 697.0000, 336.0000, 297.0000, 287.0000, 462.0000,
  2127.          366.0000],
  2128.         [495.6700, 245.2700, 711.0000, 275.0000, 360.0000, 341.0000, 491.0000,
  2129.          353.0000],
  2130.         [495.7300, 239.1300, 704.0000, 277.0000, 335.0000, 287.0000, 455.0000,
  2131.          333.0000],
  2132.         [504.5688, 239.8694, 680.0000, 314.0000, 308.0000, 303.0000, 595.6520,
  2133.          319.2123],
  2134.         [493.7296, 243.3093, 654.0000, 159.0000, 284.0000, 221.0000, 463.0000,
  2135.          333.0000]], device='cuda:0')
  2136. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2137. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2138. loss_train:  nan
  2139. step:  67
  2140. running loss:  nan
  2141. Train Steps: 67/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2142.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2143.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2144.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2145.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2146.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2147.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2148.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2149.        grad_fn=<AddmmBackward>)
  2150. landmarks are:  tensor([[501.8400, 245.9700, 571.0000, 128.0000, 320.0000, 159.0000, 486.0000,
  2151.          338.0000],
  2152.         [502.1600, 246.1500, 647.0000, 343.0000, 335.0000, 285.0000, 453.0000,
  2153.          365.0000],
  2154.         [500.5754, 243.6220, 664.0000, 140.0000, 375.0000, 155.0000, 563.9941,
  2155.          337.9540],
  2156.         [496.2100, 244.5800, 688.8793, 172.7032, 324.0115, 153.2296, 472.5629,
  2157.          329.7797],
  2158.         [501.1400, 242.9200, 719.0000, 278.0000, 305.0000, 299.0000, 506.0662,
  2159.          290.5729],
  2160.         [495.8600, 244.5100, 692.0000, 337.0000, 332.0000, 262.0000, 436.0000,
  2161.          339.0000],
  2162.         [500.7600, 247.1900, 641.0000, 141.0000, 391.0000,  92.0000, 502.5046,
  2163.          293.6965],
  2164.         [     nan,      nan, 617.9645, 156.6336, 294.0000, 164.0000, 433.0000,
  2165.          310.0000]], device='cuda:0')
  2166. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2167. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2168. loss_train:  nan
  2169. step:  68
  2170. running loss:  nan
  2171. Train Steps: 68/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2172.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2173.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2174.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2175.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2176.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2177.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2178.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2179.        grad_fn=<AddmmBackward>)
  2180. landmarks are:  tensor([[498.3700, 249.1100, 607.0000, 137.0000, 321.0000, 173.0000, 496.0000,
  2181.          346.0000],
  2182.         [495.0900, 241.9900, 692.0000, 316.0000, 359.0000, 309.0000, 474.0000,
  2183.          303.0000],
  2184.         [488.1300, 241.1800, 691.0000, 223.0000, 290.0000, 301.0000, 483.0000,
  2185.          330.0000],
  2186.         [505.6500, 242.8800, 699.0000, 337.0000, 310.0000, 265.0000, 508.9092,
  2187.          295.6117],
  2188.         [496.1100, 241.7100, 702.0746, 276.1287, 294.6791, 252.1829, 475.0000,
  2189.          327.0000],
  2190.         [502.8574, 256.4433, 680.0000, 270.0000, 362.0000, 155.0000, 435.1920,
  2191.          372.5677],
  2192.         [500.0000, 248.7500, 707.0000, 236.0000, 287.0000, 257.0000, 493.0000,
  2193.          322.0000],
  2194.         [494.5853, 238.9313, 603.2948, 142.7309, 316.7463, 167.4912, 486.3070,
  2195.          323.3411]], device='cuda:0')
  2196. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2197. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2198. loss_train:  nan
  2199. step:  69
  2200. running loss:  nan
  2201. Train Steps: 69/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2202.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2203.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2204.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2205.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2206.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2207.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2208.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2209.        grad_fn=<AddmmBackward>)
  2210. landmarks are:  tensor([[497.7500, 237.4100, 707.0000, 301.0000, 315.0000, 276.0000, 472.0000,
  2211.          301.0000],
  2212.         [507.1360, 249.5636, 672.0000, 337.0000, 306.0000, 249.0000, 587.4209,
  2213.          344.8954],
  2214.         [493.1591, 237.3677, 700.5271, 305.2639, 343.9919, 319.1815, 481.7994,
  2215.          312.1400],
  2216.         [498.0600, 251.1200, 708.0000, 330.0000, 304.0000, 255.0000, 450.0000,
  2217.          337.0000],
  2218.         [507.9918, 247.0619, 669.0000, 163.0000, 388.0000, 102.0000, 515.2408,
  2219.          310.1886],
  2220.         [507.9918, 248.6254, 740.0000, 246.0000, 330.0000, 225.0000, 570.3256,
  2221.          356.6957],
  2222.         [495.1000, 234.6500, 704.0000, 295.0000, 297.0000, 288.0000, 483.0000,
  2223.          290.0000],
  2224.         [490.5919, 243.9347, 580.6928, 144.1250, 287.0000, 198.0000, 480.0000,
  2225.          336.0000]], device='cuda:0')
  2226. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2227. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2228. loss_train:  nan
  2229. step:  70
  2230. running loss:  nan
  2231. Train Steps: 70/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2232.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2233.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2234.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2235.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2236.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2237.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2238.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2239.        grad_fn=<AddmmBackward>)
  2240. landmarks are:  tensor([[507.1360, 249.5636, 672.0000, 337.0000, 306.0000, 249.0000, 587.4209,
  2241.          344.8954],
  2242.         [496.4300, 240.2100, 715.0000, 293.0000, 293.0000, 300.0000, 508.5592,
  2243.          296.8201],
  2244.         [483.8800, 235.7000, 683.0000, 326.0000, 310.0000, 307.0000, 421.3020,
  2245.          283.1544],
  2246.         [490.8700, 247.0400, 696.0000, 310.0000, 335.0000, 305.0000, 411.7228,
  2247.          329.6944],
  2248.         [503.1426, 241.4330, 727.0000, 274.0000, 315.0000, 338.0000, 564.6272,
  2249.          336.5657],
  2250.         [485.4300, 237.7600, 692.0000, 259.0000, 323.0000, 305.0000, 420.2336,
  2251.          299.9438],
  2252.         [496.0400, 242.3200, 710.4314, 287.9666, 289.9929, 257.0700, 469.2918,
  2253.          308.8831],
  2254.         [     nan,      nan, 682.0000, 133.0000, 433.0000, 142.0000, 589.3204,
  2255.          328.9302]], device='cuda:0')
  2256. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2257.  
  2258. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2259. loss_train:  nan
  2260. step:  71
  2261. running loss:  nan
  2262. Train Steps: 71/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2263.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2264.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2265.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2266.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2267.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2268.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2269.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2270.        grad_fn=<AddmmBackward>)
  2271. landmarks are:  tensor([[507.7065, 249.8763, 731.0000, 239.0000, 310.0000, 259.0000, 597.5515,
  2272.          328.2361],
  2273.         [488.1300, 241.1800, 691.0000, 223.0000, 290.0000, 301.0000, 483.0000,
  2274.          330.0000],
  2275.         [497.8500, 239.4100, 643.0000, 168.0000, 320.0000, 137.0000, 469.1422,
  2276.          312.4604],
  2277.         [     nan,      nan, 559.3947, 167.4743, 316.0000, 143.0000, 438.6643,
  2278.          349.1448],
  2279.         [501.7164, 242.3712, 720.0000, 195.0000, 395.0000, 138.0000, 575.3909,
  2280.          324.7654],
  2281.         [489.7400, 240.3700, 708.0000, 253.0000, 327.0000, 331.0000, 485.0000,
  2282.          331.0000],
  2283.         [501.8700, 246.5800, 712.0000, 229.0000, 335.0000, 130.0000, 468.6673,
  2284.          290.0746],
  2285.         [501.7164, 242.3712, 731.0000, 225.0000, 370.0000, 157.0000, 578.5567,
  2286.          324.7654]], device='cuda:0')
  2287. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2288. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2289. loss_train:  nan
  2290. step:  72
  2291. running loss:  nan
  2292. Train Steps: 72/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2293.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2294.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2295.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2296.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2297.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2298.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2299.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2300.        grad_fn=<AddmmBackward>)
  2301. landmarks are:  tensor([[484.6700, 238.7000, 663.0000, 216.0000, 272.0000, 243.0000, 442.3150,
  2302.          327.6658],
  2303.         [490.5700, 242.2000, 557.0000, 133.0000, 328.0000, 117.0000, 431.6304,
  2304.          310.4859],
  2305.         [488.9500, 241.7100, 691.0000, 288.0000, 390.0000, 305.0000, 461.0000,
  2306.          334.0000],
  2307.         [504.8541, 240.4949, 634.8398, 344.7328, 312.0000, 302.0000, 556.3961,
  2308.          321.9889],
  2309.         [495.9600, 245.1500, 673.1207, 178.4619, 329.3477, 136.4104, 469.4820,
  2310.          323.4764],
  2311.         [486.9300, 238.5500, 667.0000, 232.0000, 297.0000, 187.0000, 475.0000,
  2312.          318.0000],
  2313.         [503.9984, 246.1237, 727.0000, 266.0000, 327.0000, 184.0000, 545.6324,
  2314.          332.4009],
  2315.         [495.2000, 248.0900, 640.0000, 293.0000, 285.2831, 218.8381, 449.0000,
  2316.          354.0000]], device='cuda:0')
  2317. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2318. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2319. loss_train:  nan
  2320. step:  73
  2321. running loss:  nan
  2322. Train Steps: 73/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2323.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2324.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2325.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2326.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2327.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2328.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2329.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2330.        grad_fn=<AddmmBackward>)
  2331. landmarks are:  tensor([[501.0600, 243.8700, 723.0000, 259.0000, 287.0000, 273.0000, 506.0000,
  2332.          315.0000],
  2333.         [501.2000, 245.2900, 642.0000, 111.0000, 333.0000, 150.0000, 503.2169,
  2334.          296.8201],
  2335.         [496.5700, 246.5700, 699.0000, 300.0000, 384.0000, 338.0000, 504.0000,
  2336.          326.0000],
  2337.         [497.9500, 245.8000, 595.0000, 136.0000, 308.0000, 171.0000, 479.0000,
  2338.          315.0000],
  2339.         [500.5000, 251.9400, 691.0000, 348.0000, 319.0000, 263.0000, 448.0000,
  2340.          357.0000],
  2341.         [499.5800, 246.0100, 621.0000, 155.0000, 397.0000,  91.0000, 470.0000,
  2342.          325.0000],
  2343.         [486.0900, 237.1500, 650.0000, 235.0000, 282.0000, 245.0000, 427.7128,
  2344.          297.2106],
  2345.         [498.8640, 237.9932, 694.0000, 324.0000, 309.0000, 271.0000, 466.0000,
  2346.          312.0000]], device='cuda:0')
  2347. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2348. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2349. loss_train:  nan
  2350. step:  74
  2351. running loss:  nan
  2352. Train Steps: 74/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2353.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2354.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2355.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2356.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2357.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2358.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2359.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2360.        grad_fn=<AddmmBackward>)
  2361. landmarks are:  tensor([[494.8706, 249.2509, 546.0000, 158.0000, 332.0000, 138.0000, 457.0000,
  2362.          331.0000],
  2363.         [495.9400, 237.1000, 685.8777, 322.4308, 326.0126, 281.2020, 475.3770,
  2364.          322.6425],
  2365.         [502.0016, 241.4330, 617.7352, 124.7889, 351.3434, 134.0229, 514.8118,
  2366.          317.3988],
  2367.         [501.7200, 242.6800, 683.0000, 354.0000, 300.0000, 265.0000, 479.1508,
  2368.          278.9525],
  2369.         [493.0700, 240.3800, 703.0000, 281.0000, 293.0000, 293.0000, 471.0000,
  2370.          301.0000],
  2371.         [485.5300, 238.5200, 690.0000, 305.0000, 351.0000, 329.0000, 452.0000,
  2372.          298.0000],
  2373.         [498.2400, 247.1600, 635.0000, 134.0000, 373.9316, 106.3581, 495.0000,
  2374.          326.0000],
  2375.         [498.9000, 245.0400, 619.0000, 128.0000, 293.0000, 194.0000, 465.0000,
  2376.          334.0000]], device='cuda:0')
  2377. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2378. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2379. loss_train:  nan
  2380. step:  75
  2381. running loss:  nan
  2382. Train Steps: 75/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2383.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2384.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2385.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2386.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2387.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2388.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2389.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2390.        grad_fn=<AddmmBackward>)
  2391. landmarks are:  tensor([[492.8739, 241.4330, 693.0000, 281.0000, 394.7994, 321.8399, 485.0000,
  2392.          334.0000],
  2393.         [490.3900, 244.2900, 684.0000, 274.0000, 291.0000, 220.0000, 423.2385,
  2394.          353.5374],
  2395.         [492.9600, 240.8300, 704.0000, 320.0000, 300.0000, 289.0000, 479.0000,
  2396.          317.0000],
  2397.         [492.8739, 244.5602, 712.0000, 280.0000, 330.0000, 355.0000, 501.0000,
  2398.          322.0000],
  2399.         [     nan,      nan, 724.0000, 210.0000, 411.0000, 138.0000, 588.6873,
  2400.          342.1188],
  2401.         [501.1400, 242.9200, 719.0000, 278.0000, 305.0000, 299.0000, 506.0662,
  2402.          290.5729],
  2403.         [504.2836, 241.4330, 714.0000, 288.0000, 315.0000, 289.0000, 598.8177,
  2404.          317.8241],
  2405.         [     nan,      nan, 638.5021, 191.6575, 290.0000, 190.0000, 403.1752,
  2406.          333.7941]], device='cuda:0')
  2407. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2408. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2409. loss_train:  nan
  2410. step:  76
  2411. running loss:  nan
  2412. Train Steps: 76/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2413.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2414.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2415.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2416.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2417.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2418.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2419.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2420.        grad_fn=<AddmmBackward>)
  2421. landmarks are:  tensor([[504.7900, 241.0400, 685.0000, 348.0000, 295.0000, 285.0000, 506.0662,
  2422.          300.3342],
  2423.         [491.6200, 238.9700, 696.0000, 301.0000, 352.0000, 288.0000, 430.0000,
  2424.          345.0000],
  2425.         [497.4900, 236.0200, 695.0000, 316.0000, 345.0000, 298.0000, 479.0000,
  2426.          299.0000],
  2427.         [490.3400, 244.1100, 700.0000, 304.0000, 310.0000, 254.0000, 418.8311,
  2428.          352.8785],
  2429.         [     nan,      nan, 694.0000, 170.0000, 428.0000, 119.0000, 534.2356,
  2430.          337.2599],
  2431.         [492.8739, 242.3712, 602.0000, 128.0000, 330.0000, 124.0000, 463.0000,
  2432.          307.0000],
  2433.         [489.7400, 240.3700, 708.0000, 253.0000, 327.0000, 331.0000, 485.0000,
  2434.          331.0000],
  2435.         [500.9300, 243.1400, 711.0000, 282.0000, 294.0000, 307.0000, 508.0000,
  2436.          314.0000]], device='cuda:0')
  2437. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2438.  
  2439. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2440. loss_train:  nan
  2441. step:  77
  2442. running loss:  nan
  2443. Train Steps: 77/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2444.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2445.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2446.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2447.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2448.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2449.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2450.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2451.        grad_fn=<AddmmBackward>)
  2452. landmarks are:  tensor([[504.2836, 241.4330, 714.0000, 288.0000, 315.0000, 289.0000, 598.8177,
  2453.          317.8241],
  2454.         [495.2000, 248.0900, 640.0000, 293.0000, 285.2831, 218.8381, 449.0000,
  2455.          354.0000],
  2456.         [489.1657, 239.8694, 565.0000, 143.0000, 323.0000, 117.0000, 425.5759,
  2457.          299.5533],
  2458.         [497.1525, 246.7491, 627.0000, 127.0000, 292.0000, 188.0000, 454.0000,
  2459.          305.0000],
  2460.         [492.9600, 240.8300, 704.0000, 320.0000, 300.0000, 289.0000, 479.0000,
  2461.          317.0000],
  2462.         [492.2800, 247.0300, 695.0000, 310.0000, 391.5905, 338.8391, 441.9588,
  2463.          308.1432],
  2464.         [490.3067, 235.8042, 701.4897, 306.3193, 331.7047, 338.5089, 479.9964,
  2465.          304.8923],
  2466.         [485.9900, 240.3900, 662.0000, 295.0000, 324.0000, 306.0000, 413.3255,
  2467.          316.8095]], device='cuda:0')
  2468. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2469. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2470. loss_train:  nan
  2471. step:  78
  2472. running loss:  nan
  2473. Train Steps: 78/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2474.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2475.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2476.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2477.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2478.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2479.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2480.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2481.        grad_fn=<AddmmBackward>)
  2482. landmarks are:  tensor([[485.5300, 238.5200, 690.0000, 305.0000, 351.0000, 329.0000, 452.0000,
  2483.          298.0000],
  2484.         [490.0000, 238.9800, 700.0000, 292.0000, 342.0000, 287.0000, 433.0000,
  2485.          343.0000],
  2486.         [508.8475, 244.5602, 709.0000, 321.0000, 297.0000, 279.0000, 532.3361,
  2487.          317.8241],
  2488.         [490.1900, 243.9500, 684.0000, 334.0000, 372.9783, 308.4714, 405.5792,
  2489.          324.7162],
  2490.         [496.7200, 235.1800, 692.0000, 322.0000, 352.0000, 304.0000, 482.0000,
  2491.          297.0000],
  2492.         [504.8541, 240.4949, 634.8398, 344.7328, 312.0000, 302.0000, 556.3961,
  2493.          321.9889],
  2494.         [501.9000, 245.9300, 690.0000, 194.0000, 352.0000, 119.0000, 470.0919,
  2495.          292.1570],
  2496.         [490.5700, 242.2000, 557.0000, 133.0000, 328.0000, 117.0000, 431.6304,
  2497.          310.4859]], device='cuda:0')
  2498. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2499. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2500. loss_train:  nan
  2501. step:  79
  2502. running loss:  nan
  2503. Train Steps: 79/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2504.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2505.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2506.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2507.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2508.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2509.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2510.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2511.        grad_fn=<AddmmBackward>)
  2512. landmarks are:  tensor([[494.3001, 240.4949, 688.0120, 240.8839, 314.5486, 150.9156, 456.8577,
  2513.          326.3061],
  2514.         [493.1591, 244.5602, 707.0000, 247.0000, 297.0000, 333.0000, 499.0000,
  2515.          321.0000],
  2516.         [496.4600, 247.3500, 574.0000, 144.0000, 311.0000, 176.0000, 498.0000,
  2517.          345.0000],
  2518.         [494.0300, 245.4600, 629.0000, 168.0000, 291.0000, 215.0000, 495.0000,
  2519.          326.0000],
  2520.         [497.2500, 245.9600, 578.0000, 122.0000, 335.0000, 133.0000, 478.0000,
  2521.          317.0000],
  2522.         [486.4500, 236.9900, 683.0000, 280.0000, 308.0000, 295.0000, 427.3566,
  2523.          297.2106],
  2524.         [     nan,      nan, 578.0000, 130.0000, 319.0000, 137.0000, 434.1235,
  2525.          310.8764],
  2526.         [500.6700, 248.5800, 682.0000, 157.0000, 396.0000, 100.0000, 497.5185,
  2527.          297.9915]], device='cuda:0')
  2528. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2529. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2530. loss_train:  nan
  2531. step:  80
  2532. running loss:  nan
  2533. Train Steps: 80/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2534.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2535.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2536.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2537.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2538.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2539.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2540.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2541.        grad_fn=<AddmmBackward>)
  2542. landmarks are:  tensor([[494.0100, 245.5700, 704.0000, 266.0000, 326.0000, 262.0000, 410.2613,
  2543.          294.0870],
  2544.         [500.9300, 243.1400, 711.0000, 282.0000, 294.0000, 307.0000, 508.0000,
  2545.          314.0000],
  2546.         [     nan,      nan, 521.0000, 103.0000, 328.0000, 119.0000, 420.2336,
  2547.          314.3904],
  2548.         [507.1360, 247.3746, 691.0000, 322.0000, 326.0000, 328.0000, 601.3504,
  2549.          326.1537],
  2550.         [491.0200, 243.2600, 700.0000, 285.0000, 349.0000, 301.0000, 406.9148,
  2551.          349.3146],
  2552.         [492.0181, 245.8110, 597.4271, 191.6575, 306.0000, 158.0000, 437.0000,
  2553.          348.0000],
  2554.         [501.1200, 242.0800, 711.0000, 293.0000, 324.0000, 313.0000, 508.9154,
  2555.          287.4493],
  2556.         [495.6600, 245.6500, 605.0000, 169.0000, 315.0000, 191.0000, 481.0000,
  2557.          371.0000]], device='cuda:0')
  2558. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2559. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2560. loss_train:  nan
  2561. step:  81
  2562. running loss:  nan
  2563. Train Steps: 81/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2564.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2565.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2566.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2567.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2568.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2569.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2570.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2571.        grad_fn=<AddmmBackward>)
  2572. landmarks are:  tensor([[495.4411, 235.8042, 715.9302, 266.2137, 308.1366, 326.1029, 501.0316,
  2573.          315.7639],
  2574.         [508.2770, 247.6873, 731.0000, 212.0000, 375.0000, 195.0000, 571.5920,
  2575.          359.4722],
  2576.         [497.8600, 241.7000, 719.0000, 252.0000, 301.0000, 344.0000, 510.0000,
  2577.          310.0000],
  2578.         [494.5853, 237.9932, 661.0566, 183.8918, 282.0930, 249.6721, 495.3221,
  2579.          317.4111],
  2580.         [501.7164, 242.3712, 731.0000, 225.0000, 370.0000, 157.0000, 578.5567,
  2581.          324.7654],
  2582.         [502.1200, 243.3900, 664.0000, 159.0000, 349.0000, 111.0000, 491.1808,
  2583.          289.3645],
  2584.         [495.8600, 244.5100, 692.0000, 337.0000, 332.0000, 262.0000, 436.0000,
  2585.          339.0000],
  2586.         [491.4477, 242.3712, 659.0000, 200.0000, 326.0000, 127.0000, 410.9736,
  2587.          298.3820]], device='cuda:0')
  2588. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2589. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2590. loss_train:  nan
  2591. step:  82
  2592. running loss:  nan
  2593. Train Steps: 82/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2594.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2595.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2596.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2597.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2598.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2599.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2600.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2601.        grad_fn=<AddmmBackward>)
  2602. landmarks are:  tensor([[486.0200, 240.0200, 681.0000, 311.0000, 360.7842, 319.7291, 414.3940,
  2603.          316.8095],
  2604.         [498.1000, 245.8600, 713.0000, 246.0000, 322.0000, 143.0000, 461.0000,
  2605.          322.0000],
  2606.         [490.7900, 246.9100, 707.0000, 280.0000, 343.0000, 363.0000, 462.2595,
  2607.          305.8005],
  2608.         [502.5721, 242.0584, 626.3995, 124.7889, 362.5892, 124.7976, 512.3327,
  2609.          319.3755],
  2610.         [486.8400, 238.8300, 696.0000, 285.0000, 361.0000, 317.0000, 425.9320,
  2611.          302.2865],
  2612.         [495.8300, 246.2900, 636.0000, 196.0000, 294.0000, 226.0000, 483.0000,
  2613.          370.0000],
  2614.         [490.3900, 244.2900, 684.0000, 274.0000, 291.0000, 220.0000, 423.2385,
  2615.          353.5374],
  2616.         [507.1360, 246.7491, 707.0000, 304.0000, 320.0000, 326.0000, 603.8831,
  2617.          322.6830]], device='cuda:0')
  2618. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2619.  
  2620. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2621. loss_train:  nan
  2622. step:  83
  2623. running loss:  nan
  2624. Train Steps: 83/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2625.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2626.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2627.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2628.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2629.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2630.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2631.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2632.        grad_fn=<AddmmBackward>)
  2633. landmarks are:  tensor([[502.1300, 241.7600, 660.0000, 146.0000, 346.0000, 126.0000, 509.2715,
  2634.          312.4382],
  2635.         [486.6900, 238.8800, 687.0000, 314.0000, 368.0000, 322.0000, 454.4241,
  2636.          300.3342],
  2637.         [509.1327, 249.2509, 715.0000, 223.0000, 360.0000, 155.0000, 515.8740,
  2638.          317.1299],
  2639.         [487.4300, 239.4100, 672.0000, 260.0000, 295.0000, 278.0000, 444.8080,
  2640.          339.3793],
  2641.         [507.1360, 249.5636, 672.0000, 337.0000, 306.0000, 249.0000, 587.4209,
  2642.          344.8954],
  2643.         [490.2000, 245.0600, 699.0000, 281.0000, 289.0000, 222.0000, 396.7645,
  2644.          323.8376],
  2645.         [490.8772, 241.7457, 661.0000, 201.0000, 290.0000, 184.0000, 454.0000,
  2646.          310.0000],
  2647.         [491.7300, 241.5500, 528.0000, 148.0000, 327.0000, 129.0000, 439.1096,
  2648.          346.4075]], device='cuda:0')
  2649. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2650. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2651. loss_train:  nan
  2652. step:  84
  2653. running loss:  nan
  2654. Train Steps: 84/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2655.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2656.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2657.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2658.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2659.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2660.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2661.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2662.        grad_fn=<AddmmBackward>)
  2663. landmarks are:  tensor([[489.1700, 240.2900, 707.0000, 232.0000, 301.0000, 282.0000, 464.0000,
  2664.          333.0000],
  2665.         [495.4411, 242.0584, 620.5229, 140.8099, 298.6643, 175.1677, 474.4262,
  2666.          295.5407],
  2667.         [     nan,      nan, 727.0000, 227.0000, 365.0000, 157.0000, 539.3008,
  2668.          334.4833],
  2669.         [489.6200, 240.8100, 549.0000, 169.0000, 296.0000, 167.0000, 441.0000,
  2670.          340.0000],
  2671.         [504.1500, 240.4100, 708.0000, 330.0000, 289.0000, 271.0000, 506.7784,
  2672.          300.7246],
  2673.         [501.8400, 245.9700, 571.0000, 128.0000, 320.0000, 159.0000, 486.0000,
  2674.          338.0000],
  2675.         [501.0300, 253.9500, 633.9382, 277.5496, 303.0000, 173.0000, 445.8765,
  2676.          362.8064],
  2677.         [490.6600, 245.6100, 679.5771, 241.6917, 287.0000, 192.0000, 400.2369,
  2678.          324.4233]], device='cuda:0')
  2679. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2680. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2681. loss_train:  nan
  2682. step:  85
  2683. running loss:  nan
  2684. Train Steps: 85/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2685.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2686.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2687.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2688.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2689.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2690.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2691.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2692.        grad_fn=<AddmmBackward>)
  2693. landmarks are:  tensor([[487.6300, 240.1400, 682.6197, 310.0718, 402.5010, 305.6570, 410.0000,
  2694.          326.0000],
  2695.         [494.3001, 239.5567, 714.0048, 287.3219, 310.3356, 293.9974, 483.3020,
  2696.          316.7523],
  2697.         [509.1327, 249.2509, 715.0000, 223.0000, 360.0000, 155.0000, 515.8740,
  2698.          317.1299],
  2699.         [500.8607, 241.7457, 697.6390, 201.8338, 293.1961, 225.1964, 522.4747,
  2700.          310.9746],
  2701.         [488.0600, 238.6400, 684.0000, 340.0000, 309.0000, 265.0000, 410.2613,
  2702.          292.1347],
  2703.         [496.1000, 246.1100, 583.0000, 145.0000, 332.0000, 143.0000, 488.0000,
  2704.          330.0000],
  2705.         [502.8574, 238.6186, 723.0000, 284.0000, 312.0000, 249.0000, 565.8936,
  2706.          319.2123],
  2707.         [488.1900, 240.0300, 619.0000, 215.0000, 277.0000, 228.0000, 441.9588,
  2708.          341.3316]], device='cuda:0')
  2709. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2710. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2711. loss_train:  nan
  2712. step:  86
  2713. running loss:  nan
  2714. Train Steps: 86/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2715.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2716.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2717.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2718.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2719.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2720.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2721.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2722.        grad_fn=<AddmmBackward>)
  2723. landmarks are:  tensor([[     nan,      nan, 664.0000, 189.0000, 287.0000, 203.0000, 416.6721,
  2724.          311.6573],
  2725.         [490.9200, 242.2600, 685.0000, 243.0000, 305.0000, 153.0000, 408.4806,
  2726.          297.2106],
  2727.         [494.3001, 239.5567, 714.0048, 287.3219, 310.3356, 293.9974, 483.3020,
  2728.          316.7523],
  2729.         [487.2000, 240.6200, 627.0000, 209.0000, 283.0000, 227.0000, 436.9727,
  2730.          304.6292],
  2731.         [496.8600, 244.1200, 700.0000, 307.0000, 332.0000, 294.0000, 470.0000,
  2732.          310.0000],
  2733.         [503.4279, 245.8110, 704.0000, 151.0000, 421.0000, 156.0000, 594.3857,
  2734.          322.6830],
  2735.         [490.8700, 247.0400, 696.0000, 310.0000, 335.0000, 305.0000, 411.7228,
  2736.          329.6944],
  2737.         [490.1900, 243.9500, 684.0000, 334.0000, 372.9783, 308.4714, 405.5792,
  2738.          324.7162]], device='cuda:0')
  2739. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2740. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2741. loss_train:  nan
  2742. step:  87
  2743. running loss:  nan
  2744. Train Steps: 87/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2745.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2746.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2747.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2748.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2749.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2750.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2751.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2752.        grad_fn=<AddmmBackward>)
  2753. landmarks are:  tensor([[492.8600, 245.9100, 699.0000, 263.0000, 303.0000, 329.0000, 448.3696,
  2754.          301.1151],
  2755.         [496.2100, 245.7600, 709.0000, 256.0000, 283.0000, 247.0000, 482.0000,
  2756.          339.0000],
  2757.         [501.7164, 242.3712, 720.0000, 195.0000, 395.0000, 138.0000, 575.3909,
  2758.          324.7654],
  2759.         [495.4600, 246.4600, 595.0000, 162.0000, 292.0000, 221.0000, 499.0000,
  2760.          343.0000],
  2761.         [495.9300, 246.6900, 678.0000, 223.0000, 284.0000, 261.0000, 485.0000,
  2762.          365.0000],
  2763.         [490.3900, 244.2900, 684.0000, 274.0000, 291.0000, 220.0000, 423.2385,
  2764.          353.5374],
  2765.         [489.3200, 241.1600, 683.0000, 244.0000, 281.0000, 215.0000, 453.0000,
  2766.          308.0000],
  2767.         [496.1400, 238.9700, 684.3768, 325.7215, 307.3357, 262.1889, 469.2918,
  2768.          323.8934]], device='cuda:0')
  2769. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2770. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2771. loss_train:  nan
  2772. step:  88
  2773. running loss:  nan
  2774. Train Steps: 88/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2775.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2776.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2777.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2778.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2779.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2780.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2781.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2782.        grad_fn=<AddmmBackward>)
  2783. landmarks are:  tensor([[497.9600, 237.8300, 658.0000, 343.0000, 332.0000, 277.0000, 462.0000,
  2784.          316.0000],
  2785.         [503.7131, 242.6839, 731.0000, 246.0000, 338.5284, 254.5401, 593.7525,
  2786.          317.8241],
  2787.         [501.7200, 242.6800, 683.0000, 354.0000, 300.0000, 265.0000, 479.1508,
  2788.          278.9525],
  2789.         [492.0900, 245.0800, 700.0000, 262.0000, 311.0000, 262.0000, 405.3121,
  2790.          350.7788],
  2791.         [489.9100, 244.5200, 615.6826, 249.1969, 278.0000, 226.0000, 412.5242,
  2792.          325.5947],
  2793.         [496.2400, 244.3600, 655.1108, 143.9094, 352.0268, 123.2475, 474.3377,
  2794.          330.0576],
  2795.         [505.1393, 242.9966, 658.1664, 325.8310, 332.0000, 331.0000, 569.6925,
  2796.          341.4247],
  2797.         [508.8475, 244.5602, 709.0000, 321.0000, 297.0000, 279.0000, 532.3361,
  2798.          317.8241]], device='cuda:0')
  2799. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2800.  
  2801. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2802. loss_train:  nan
  2803. step:  89
  2804. running loss:  nan
  2805. Train Steps: 89/90  Loss: nan predictions are:  tensor([[nan, nan, nan, nan, nan, nan, nan, nan],
  2806.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2807.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2808.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2809.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2810.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2811.         [nan, nan, nan, nan, nan, nan, nan, nan],
  2812.         [nan, nan, nan, nan, nan, nan, nan, nan]], device='cuda:0',
  2813.        grad_fn=<AddmmBackward>)
  2814. landmarks are:  tensor([[486.0900, 237.1500, 650.0000, 235.0000, 282.0000, 245.0000, 427.7128,
  2815.          297.2106],
  2816.         [496.5700, 246.5700, 699.0000, 300.0000, 384.0000, 338.0000, 504.0000,
  2817.          326.0000],
  2818.         [489.0500, 245.2700, 548.7455, 132.4504, 349.0000, 102.0000, 415.0000,
  2819.          332.0000],
  2820.         [501.4600, 245.6700, 723.0000, 258.0000, 296.0000, 209.0000, 501.0000,
  2821.          310.0000],
  2822.         [494.5853, 239.2440, 630.2503, 173.3377, 295.9114, 195.6037, 486.9080,
  2823.          322.0233],
  2824.         [501.0800, 241.7700, 720.0000, 286.0000, 304.0000, 310.0000, 513.1892,
  2825.          286.2780],
  2826.         [490.0000, 238.9800, 700.0000, 292.0000, 342.0000, 287.0000, 433.0000,
  2827.          343.0000],
  2828.         [503.7131, 244.5602, 672.0000, 335.0000, 296.0000, 262.0000, 550.0646,
  2829.          329.6244]], device='cuda:0')
  2830. loss_train_step before backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2831. loss_train_step after backward:  tensor(nan, device='cuda:0', grad_fn=<MseLossBackward>)
  2832. loss_train:  nan
  2833. step:  90
  2834. running loss:  nan
  2835. Valid Steps: 10/10  Loss: nan
  2836. --------------------------------------------------
  2837. Epoch: 1  Train Loss: nan Valid Loss: nan
  2838. --------------------------------------------------
  2839. Training Complete
  2840. Total Elapsed Time : 106.72142887115479 s
  2841.  
RAW Paste Data