Advertisement
Guest User

Untitled

a guest
Apr 6th, 2020
188
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.39 KB | None | 0 0
  1. #!/usr/bin/env python
  2. """Derp"""
  3.  
  4. import sys
  5. if (len(sys.argv) < 3):
  6.     print("usage: python", sys.argv[0], "<spectrograms-npy-folder>", "<targets-npy-file>", "<serialized-model-folder>")
  7.     exit(1)
  8.  
  9. import torch
  10. import numpy
  11. import os
  12. import random
  13. import datetime
  14. import model_class
  15. import scipy.stats
  16.  
  17. if torch.cuda.is_available():
  18.     print("CUDA is available. Using GPU.")
  19.     device = torch.device('cuda')
  20. else:
  21.     print("CUDA is not available. Using CPU")
  22.     device = torch.device('cpu')
  23.  
  24. print("Reading the spectrogram files")
  25. spectrograms = numpy.empty((5000, 500,149))
  26. n_spectrograms = 0
  27. for root, _, files in os.walk(sys.argv[1]):
  28.     for f in files:
  29.         spectrogram = numpy.load(os.path.join(root, f))
  30.         spectrograms[n_spectrograms] = spectrogram
  31.         n_spectrograms = n_spectrograms + 1
  32.  
  33. spectrograms = torch.from_numpy(spectrograms).float()
  34. spectrograms = spectrograms[:, None, :, :] # PyTorch requires an (empty) channel dimension. How the hell does "None-style syntax" work? O_o
  35.  
  36. print("Reading the targets file")
  37. targets = numpy.load(sys.argv[2])
  38. targets = torch.from_numpy(targets).float()
  39. targets = targets * 0.1
  40. n_features = targets.size()[1]
  41.  
  42. print("Defining the network")
  43. net = model_class.Net(n_features).to(device)
  44. optimizer = torch.optim.Adam(net.parameters(), lr=0.00005)
  45. criterion = torch.nn.MSELoss()
  46.  
  47. training_to_test_ratio = 0.08
  48. n_epochs = 50
  49. batch_size = 8
  50.  
  51. params = list(net.parameters())
  52. print(len(params))
  53. exit(0)
  54.  
  55. print("Splitting data into %d/%d training/testing data" % (n_spectrograms*training_to_test_ratio, n_spectrograms*(1-training_to_test_ratio)))
  56. training_set_indices = random.sample(range(0, n_spectrograms), int(n_spectrograms*training_to_test_ratio))
  57. training_spectrograms = spectrograms[training_set_indices]
  58. training_targets = targets[training_set_indices]
  59. training_set = torch.utils.data.TensorDataset(training_spectrograms, training_targets)
  60.  
  61. testing_set_indices = [x for x in range(0, n_spectrograms) if x not in training_set_indices]
  62. testing_spectrograms = spectrograms[testing_set_indices]
  63. testing_targets = targets[testing_set_indices]
  64. testing_set = torch.utils.data.TensorDataset(testing_spectrograms, testing_targets)
  65.  
  66. print("Training model")
  67. training_loss = numpy.zeros(n_epochs)
  68. testing_loss = numpy.zeros(n_epochs)
  69. correlation = numpy.zeros((n_epochs, n_features))
  70. for epoch in range(0, n_epochs):
  71.  
  72.     # Define the PyTorch dataset batch iterator
  73.  
  74.     # Train
  75.     training_loader = torch.utils.data.DataLoader(training_set, batch_size = batch_size, shuffle = True)
  76.     for batch_number, batch in enumerate(training_loader):
  77.        
  78.         # Get batch data
  79.         batch_spectrograms, batch_targets = batch
  80.         batch_spectrograms = batch_spectrograms.to(device)
  81.         batch_targets = batch_targets.to(device)
  82.  
  83.         # Forward
  84.         batch_outputs = net.forward(batch_spectrograms)
  85.  
  86.         # Compute loss
  87.         loss = criterion.forward(input = batch_outputs, target = batch_targets)
  88.        
  89.         # Zero the gradients from the previous run
  90.         optimizer.zero_grad()
  91.  
  92.         # Compute gradients
  93.         loss.backward()
  94.  
  95.         # Update gradients
  96.         optimizer.step()
  97.  
  98.         # Record loss of this batch
  99.         training_loss[epoch] += loss.item()
  100.  
  101.     training_loss[epoch] /= int(len(training_set)/batch_size) # epoch loss is the average loss of all the mini batches
  102.  
  103.     # Test (validation?)
  104.     testing_loader = torch.utils.data.DataLoader(testing_set, batch_size = batch_size, shuffle = False)
  105.     all_batch_outputs_cpu = numpy.zeros((testing_spectrograms.size()[0], n_features))
  106.     for batch_number, batch in enumerate(testing_loader):
  107.         batch_spectrograms, batch_targets = batch
  108.         batch_spectrograms = batch_spectrograms.to(device)
  109.         batch_targets = batch_targets.to(device)
  110.  
  111.         batch_outputs = net.forward(batch_spectrograms)
  112.         all_batch_outputs_cpu[batch_number*batch_size:(batch_number+1)*batch_size] = batch_outputs.cpu().detach().numpy()
  113.        
  114.         loss = criterion.forward(input = batch_outputs, target = batch_targets)
  115.  
  116.         testing_loss[epoch] += loss.item()
  117.        
  118.     # Calculate Pearson's correlation coefficient for each feature
  119.     for feature in range(0, n_features):
  120.         corr, _ = scipy.stats.pearsonr(all_batch_outputs_cpu[:, feature], testing_targets.numpy()[:, feature])
  121.         correlation[epoch, feature] = corr
  122.  
  123.     testing_loss[epoch] /= int(len(testing_set)/batch_size) # epoch loss is the average loss of all the mini batches
  124.  
  125.     print("Epoch %d/%d" % (epoch, n_epochs))
  126.     print("    Training loss: %3f" % training_loss[epoch])
  127.     print("    Testing loss: %.3f" % testing_loss[epoch])
  128.     print("    Correlation:", correlation[epoch])
  129.    
  130.  
  131. serialized_model_folder_name = os.path.join(sys.argv[3], datetime.datetime.now().isoformat())
  132. os.mkdir(serialized_model_folder_name)
  133. print("Serializing model parameters to", serialized_model_folder_name)
  134. torch.save(net.state_dict(), os.path.join(serialized_model_folder_name, "model.pth"))
  135. numpy.save(os.path.join(serialized_model_folder_name, "training-set-indicies.npy"), training_set_indices)
  136. numpy.save(os.path.join(serialized_model_folder_name, "training-loss.npy"), training_loss)
  137. numpy.save(os.path.join(serialized_model_folder_name, "testing-loss.npy"), testing_loss)
  138. numpy.save(os.path.join(serialized_model_folder_name, "correlation.npy"), correlation)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement