Advertisement
Guest User

PyTorch SA Code

a guest
Feb 28th, 2019
789
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 9.26 KB | None | 0 0
  1. import torch
  2. import os
  3. import torch.nn as nn
  4. import pandas as pd
  5. import numpy as np
  6. import matplotlib.pyplot as plt
  7. from torch.utils.data import Dataset, DataLoader
  8. from torchvision import transforms, utils
  9. import sklearn as sk
  10. from sklearn import preprocessing as prep
  11. from sklearn.model_selection import train_test_split
  12. from adspy_shared_utilities import plot_class_regions_for_classifier_subplot
  13. from sklearn.model_selection import cross_val_score
  14. from sklearn.neural_network import MLPClassifier
  15. import csv
  16. from SimulatedAnnealingJoe import SimulatedAnnealing as sa
  17.  
  18. # Define my dataset
  19.  
  20. class KickstarterData(Dataset):
  21.    
  22.     def __init__(self, csv_file):
  23.         """
  24.        Args:
  25.        root_dir (string): Directory with all the images
  26.        train (boolean): Whether the dataset is training data (train = True, test = False)
  27.        """
  28.         self.df = pd.read_csv(csv_file)
  29.         self.data_list = []
  30.         self.df = pd.get_dummies(self.df) #flatten dataset
  31.         self.df = self.df.drop(columns = 'state_failed') #drop redundant state_failed column
  32.        
  33.         x = self.df.values #returns a numpy array
  34.         min_max_scaler = prep.MinMaxScaler() #get scaler to normalize
  35.        
  36.         x_scaled = min_max_scaler.fit_transform(x) #normalize data
  37.         self.df = pd.DataFrame(x_scaled, columns = self.df.columns) #put scaled data back in data-frame
  38.  
  39.        
  40.         # Now convert the data into a list of tuples
  41.         for i in range(self.df.shape[0]):
  42.             features = torch.tensor(self.df.iloc[i][0:-1])
  43.             label = torch.tensor(self.df.iloc[i][-1])
  44.             self.data_list.append((features, label)) #makes both the image and label tensors
  45.        
  46.     def __len__(self):
  47.         return len(self.data_list)
  48.        
  49.     def __getitem__(self, idx):
  50.        
  51.         return self.data_list[idx]
  52.  
  53. '''
  54. STEP 1: LOADING DATASET
  55. '''
  56. dset = KickstarterData(csv_file = 'Kickstarter-w-Better-Features.csv')
  57.  
  58. #split the data into training and test data
  59. train_size = int(0.8 * len(dset))
  60. test_size = len(dset) - train_size
  61. train_dataset, test_dataset = torch.utils.data.random_split(dset, [train_size, test_size])
  62.  
  63.  
  64.  
  65. '''
  66. STEP 2: MAKING DATASET ITERABLE
  67. '''
  68.  
  69. batch_size = len(train_dataset) #we feed all the data to the model
  70. n_iters = 10000
  71. num_epochs = n_iters / (len(train_dataset) / batch_size)
  72. num_epochs = int(num_epochs)
  73.  
  74. train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
  75.                                            batch_size = batch_size,
  76.                                            shuffle = True) #shuffle ensures we traverse images in different order across epochs
  77.  
  78. test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
  79.                                            batch_size = batch_size,
  80.                                            shuffle = False)
  81.  
  82.  
  83. '''
  84. STEP 3: CREATE MODEL CLASS
  85. '''
  86. class FeedForwardNeuralNetModel(nn.Module):
  87.     def __init__(self, input_dim, hidden_dim, output_dim): #hidden size determines how many non-linear functions we have
  88.         super(FeedForwardNeuralNetModel, self).__init__() #python requires an explicit super call
  89.         # Linear Function
  90.         self.fc1 = nn.Linear(input_dim, hidden_dim)
  91.         # Non-linearity
  92.         self.relu1 = nn.LeakyReLU()
  93.        
  94.         # Linear function 2: 100 --> 100
  95.         self.fc2 = nn.Linear(hidden_dim, hidden_dim)
  96.        
  97.         # Non-linearity 2
  98.         self.relu2 = nn.LeakyReLU()
  99.        
  100.         # Linear function 3: 100 --> 100
  101.         self.fc3 = nn.Linear(hidden_dim, hidden_dim)
  102.        
  103.         # Non-linearity 3
  104.         self.relu3 = nn.LeakyReLU()
  105.        
  106.         # Linear function (readout)
  107.         self.fc4 = nn.Linear(hidden_dim, output_dim)
  108.        
  109.        
  110.     def forward(self, x):
  111.         # Linear function
  112.         out = self.fc1(x)
  113.         # Non-linearity 1
  114.         out = self.relu1(out)
  115.        
  116.         # Linear function 2
  117.         out = self.fc2(out)
  118.         # Non-linearity 2
  119.         out = self.relu2(out)
  120.        
  121.         # Linear function 3
  122.         out = self.fc3(out)
  123.         # Non-linearity 3
  124.         out = self.relu3(out)
  125.        
  126.        
  127.         # Linear function 5(readout)
  128.         out = self.fc4(out)
  129.         return out
  130.    
  131. '''
  132. STEP 4: INSTANTIATE MODEL CLASS
  133. '''
  134. input_dim = 29
  135. hidden_dim = 100
  136. output_dim = 2
  137.  
  138. model = FeedForwardNeuralNetModel(input_dim, hidden_dim, output_dim)
  139.  
  140. #####################
  141. # USE GPU FOR MODEL #
  142. #####################
  143.  
  144. if torch.cuda.is_available():
  145.     model.cuda()
  146.  
  147.  
  148. '''
  149. STEP 5: INSTANTIATE LOSS CLASS
  150. '''
  151. criterion = nn.CrossEntropyLoss()
  152.  
  153. '''
  154. STEP 6: INSTANTIATE OPTIMIZER CLASS
  155. '''
  156. learning_rate = 0.01 #note: 1 iteration is 100 images, which is our batch size. We update parameters every 100 images
  157.  
  158. for f, l in train_loader:
  159.     features = f
  160.     labels = l
  161.    
  162. optimizer = sa(params = model.parameters(), model = model, features = features, labels = labels)
  163.  
  164. '''
  165. STEP 7: TRAIN THE MODEL
  166. '''
  167. csv_list = []
  168. iter = 0
  169. for epoch in range(num_epochs):
  170.     for i, (features, labels) in enumerate(train_loader): #review what enumerate does
  171.         #####################
  172.         # USE GPU FOR MODEL #
  173.         #####################
  174.         features = features.view(-1, 29) #load features to a vector
  175.        
  176.         if torch.cuda.is_available():
  177.             features = features.type(torch.FloatTensor).cuda()
  178.             labels = labels.type(torch.LongTensor).cuda()
  179.         else:
  180.             features = features.type(torch.FloatTensor)
  181.             labels = labels.type(torch.LongTensor)
  182.        
  183.         # Forward pass to get ouput/logits
  184.         # Size of outputs is 100 x 10 because each image has output of a value for each digit. Higher value = more likely.
  185.         outputs = model(features)
  186.        
  187.         # Calculate Loss: softmax --> cross entropy loss
  188.         loss = criterion(outputs, labels)
  189.        
  190.         # Updating parameters
  191.         optimizer.step()
  192.        
  193.         iter += 1
  194.        
  195.         if (iter % 50 == 0):
  196.             # Calculate Accuracy, for every 100 iterations
  197.             correct = 0
  198.             total = 0
  199.             # Iterate through the test dataset
  200.             for features, labels in test_loader:
  201.                 #####################
  202.                 # USE GPU FOR MODEL #
  203.                 #####################
  204.                 if torch.cuda.is_available():
  205.                     features = features.type(torch.FloatTensor).cuda()
  206.                
  207.                 labels = labels.type(torch.LongTensor)
  208.                 # Forward pass only to get logits/output
  209.                 outputs = model(features)
  210.                
  211.                 # Get predictions from the maximum value
  212.                 _, predicted = torch.max(outputs.data, 1) #need to review how this syntax works
  213.                
  214.                 # Total number of lables
  215.                 total += labels.size(0)
  216.                
  217.                 #####################
  218.                 # USE GPU FOR MODEL #
  219.                 #####################
  220.                 # Total correct predictions... need to bring predicted back to cpu to be able to use .sum() python function
  221.                 if torch.cuda.is_available():
  222.                     correct += (predicted.cpu() == labels.cpu()).sum().item()
  223.                 else:
  224.                     correct += (predicted == labels).sum().item()
  225.                    
  226.             testAccuracy = 100 * (correct / total)
  227.            
  228.             # Iterate through the train dataset
  229.             for features, labels in train_loader:
  230.                 #####################
  231.                 # USE GPU FOR MODEL #
  232.                 #####################
  233.                 if torch.cuda.is_available():
  234.                     features = features.type(torch.FloatTensor).cuda()
  235.                
  236.                 labels = labels.type(torch.LongTensor)
  237.                 # Forward pass only to get logits/output
  238.                 outputs = model(features)
  239.                
  240.                 # Get predictions from the maximum value
  241.                 _, predicted = torch.max(outputs.data, 1) #need to review how this syntax works
  242.                
  243.                 # Total number of lables
  244.                 total += labels.size(0)
  245.                
  246.                 #####################
  247.                 # USE GPU FOR MODEL #
  248.                 #####################
  249.                 # Total correct predictions... need to bring predicted back to cpu to be able to use .sum() python function
  250.                 if torch.cuda.is_available():
  251.                     correct += (predicted.cpu() == labels.cpu()).sum().item()
  252.                 else:
  253.                     correct += (predicted == labels).sum().item()
  254.                    
  255.             trainAccuracy = 100 * (correct / total)
  256.            
  257.            
  258.             # Print Loss
  259.             print('Iteration: {}. Loss: {}. Train Accuracy: {}. Test Accuracy: {}'
  260.                   .format(iter, loss.item(), trainAccuracy, testAccuracy))
  261.            
  262.             # Append iteration, loss, and accuracy to a list
  263.             csvData = [iter, loss.item(), trainAccuracy, testAccuracy]
  264.             csv_list.append(csvData)
  265.  
  266. with open('nn-kickstarter-sa.csv', 'w', newline = '') as csvFile:
  267.     writer = csv.writer(csvFile)
  268.     writer.writerows(csv_list)
  269.  
  270. csvFile.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement