Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import torch
- import os
- import torch.nn as nn
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- from torch.utils.data import Dataset, DataLoader
- from torchvision import transforms, utils
- import sklearn as sk
- from sklearn import preprocessing as prep
- from sklearn.model_selection import train_test_split
- from adspy_shared_utilities import plot_class_regions_for_classifier_subplot
- from sklearn.model_selection import cross_val_score
- from sklearn.neural_network import MLPClassifier
- import csv
- from SimulatedAnnealingJoe import SimulatedAnnealing as sa
- # Define my dataset
- class KickstarterData(Dataset):
- def __init__(self, csv_file):
- """
- Args:
- root_dir (string): Directory with all the images
- train (boolean): Whether the dataset is training data (train = True, test = False)
- """
- self.df = pd.read_csv(csv_file)
- self.data_list = []
- self.df = pd.get_dummies(self.df) #flatten dataset
- self.df = self.df.drop(columns = 'state_failed') #drop redundant state_failed column
- x = self.df.values #returns a numpy array
- min_max_scaler = prep.MinMaxScaler() #get scaler to normalize
- x_scaled = min_max_scaler.fit_transform(x) #normalize data
- self.df = pd.DataFrame(x_scaled, columns = self.df.columns) #put scaled data back in data-frame
- # Now convert the data into a list of tuples
- for i in range(self.df.shape[0]):
- features = torch.tensor(self.df.iloc[i][0:-1])
- label = torch.tensor(self.df.iloc[i][-1])
- self.data_list.append((features, label)) #makes both the image and label tensors
- def __len__(self):
- return len(self.data_list)
- def __getitem__(self, idx):
- return self.data_list[idx]
- '''
- STEP 1: LOADING DATASET
- '''
- dset = KickstarterData(csv_file = 'Kickstarter-w-Better-Features.csv')
- #split the data into training and test data
- train_size = int(0.8 * len(dset))
- test_size = len(dset) - train_size
- train_dataset, test_dataset = torch.utils.data.random_split(dset, [train_size, test_size])
- '''
- STEP 2: MAKING DATASET ITERABLE
- '''
- batch_size = len(train_dataset) #we feed all the data to the model
- n_iters = 10000
- num_epochs = n_iters / (len(train_dataset) / batch_size)
- num_epochs = int(num_epochs)
- train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
- batch_size = batch_size,
- shuffle = True) #shuffle ensures we traverse images in different order across epochs
- test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
- batch_size = batch_size,
- shuffle = False)
- '''
- STEP 3: CREATE MODEL CLASS
- '''
- class FeedForwardNeuralNetModel(nn.Module):
- def __init__(self, input_dim, hidden_dim, output_dim): #hidden size determines how many non-linear functions we have
- super(FeedForwardNeuralNetModel, self).__init__() #python requires an explicit super call
- # Linear Function
- self.fc1 = nn.Linear(input_dim, hidden_dim)
- # Non-linearity
- self.relu1 = nn.LeakyReLU()
- # Linear function 2: 100 --> 100
- self.fc2 = nn.Linear(hidden_dim, hidden_dim)
- # Non-linearity 2
- self.relu2 = nn.LeakyReLU()
- # Linear function 3: 100 --> 100
- self.fc3 = nn.Linear(hidden_dim, hidden_dim)
- # Non-linearity 3
- self.relu3 = nn.LeakyReLU()
- # Linear function (readout)
- self.fc4 = nn.Linear(hidden_dim, output_dim)
- def forward(self, x):
- # Linear function
- out = self.fc1(x)
- # Non-linearity 1
- out = self.relu1(out)
- # Linear function 2
- out = self.fc2(out)
- # Non-linearity 2
- out = self.relu2(out)
- # Linear function 3
- out = self.fc3(out)
- # Non-linearity 3
- out = self.relu3(out)
- # Linear function 5(readout)
- out = self.fc4(out)
- return out
- '''
- STEP 4: INSTANTIATE MODEL CLASS
- '''
- input_dim = 29
- hidden_dim = 100
- output_dim = 2
- model = FeedForwardNeuralNetModel(input_dim, hidden_dim, output_dim)
- #####################
- # USE GPU FOR MODEL #
- #####################
- if torch.cuda.is_available():
- model.cuda()
- '''
- STEP 5: INSTANTIATE LOSS CLASS
- '''
- criterion = nn.CrossEntropyLoss()
- '''
- STEP 6: INSTANTIATE OPTIMIZER CLASS
- '''
- learning_rate = 0.01 #note: 1 iteration is 100 images, which is our batch size. We update parameters every 100 images
- for f, l in train_loader:
- features = f
- labels = l
- optimizer = sa(params = model.parameters(), model = model, features = features, labels = labels)
- '''
- STEP 7: TRAIN THE MODEL
- '''
- csv_list = []
- iter = 0
- for epoch in range(num_epochs):
- for i, (features, labels) in enumerate(train_loader): #review what enumerate does
- #####################
- # USE GPU FOR MODEL #
- #####################
- features = features.view(-1, 29) #load features to a vector
- if torch.cuda.is_available():
- features = features.type(torch.FloatTensor).cuda()
- labels = labels.type(torch.LongTensor).cuda()
- else:
- features = features.type(torch.FloatTensor)
- labels = labels.type(torch.LongTensor)
- # Forward pass to get ouput/logits
- # Size of outputs is 100 x 10 because each image has output of a value for each digit. Higher value = more likely.
- outputs = model(features)
- # Calculate Loss: softmax --> cross entropy loss
- loss = criterion(outputs, labels)
- # Updating parameters
- optimizer.step()
- iter += 1
- if (iter % 50 == 0):
- # Calculate Accuracy, for every 100 iterations
- correct = 0
- total = 0
- # Iterate through the test dataset
- for features, labels in test_loader:
- #####################
- # USE GPU FOR MODEL #
- #####################
- if torch.cuda.is_available():
- features = features.type(torch.FloatTensor).cuda()
- labels = labels.type(torch.LongTensor)
- # Forward pass only to get logits/output
- outputs = model(features)
- # Get predictions from the maximum value
- _, predicted = torch.max(outputs.data, 1) #need to review how this syntax works
- # Total number of lables
- total += labels.size(0)
- #####################
- # USE GPU FOR MODEL #
- #####################
- # Total correct predictions... need to bring predicted back to cpu to be able to use .sum() python function
- if torch.cuda.is_available():
- correct += (predicted.cpu() == labels.cpu()).sum().item()
- else:
- correct += (predicted == labels).sum().item()
- testAccuracy = 100 * (correct / total)
- # Iterate through the train dataset
- for features, labels in train_loader:
- #####################
- # USE GPU FOR MODEL #
- #####################
- if torch.cuda.is_available():
- features = features.type(torch.FloatTensor).cuda()
- labels = labels.type(torch.LongTensor)
- # Forward pass only to get logits/output
- outputs = model(features)
- # Get predictions from the maximum value
- _, predicted = torch.max(outputs.data, 1) #need to review how this syntax works
- # Total number of lables
- total += labels.size(0)
- #####################
- # USE GPU FOR MODEL #
- #####################
- # Total correct predictions... need to bring predicted back to cpu to be able to use .sum() python function
- if torch.cuda.is_available():
- correct += (predicted.cpu() == labels.cpu()).sum().item()
- else:
- correct += (predicted == labels).sum().item()
- trainAccuracy = 100 * (correct / total)
- # Print Loss
- print('Iteration: {}. Loss: {}. Train Accuracy: {}. Test Accuracy: {}'
- .format(iter, loss.item(), trainAccuracy, testAccuracy))
- # Append iteration, loss, and accuracy to a list
- csvData = [iter, loss.item(), trainAccuracy, testAccuracy]
- csv_list.append(csvData)
- with open('nn-kickstarter-sa.csv', 'w', newline = '') as csvFile:
- writer = csv.writer(csvFile)
- writer.writerows(csv_list)
- csvFile.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement