Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- @author: chrispellegrino
- Class: CS 767
- Assignment: 3
- Using single layer Perceptron neural network which is connected to “Leaky ReLU”
- activation function with (a=0.05) to predict salary of baseball players using the data set
- “Assignment_3_Hitters.csv”.
- Use batch gradient descent to adjust the weights and predict salary
- with L2 regularization and Lasso with lambda = 0.01 and 10.
- (i) Input data is Assignment_3_Hitters.csv data, which is available in Blackboard.
- (ii) Write a code and build a single layer Perceptron with Leaky ReLU activation function as follows.
- (iii) Use all the features to predict Salary.
- (iv) Assume anything that is needed to solve the problem. Make sure to state your assumptions.
- a. (30 points) Choose a learning rate. Show details of your work and all the steps that you take to choose a suitable learning rate.
- b. (20 points) Plot total cost (MSE - Mean Square Error + regularization cost) as a function of iterations for both regularizations.
- c. (50 points) Create a table of the weights and show the final weights of the solution
- i. Without regularization
- ii. With L2 regularization and two different lambdas
- iii. With Lasso regularization and two different lambdas
- """
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- hitters = pd.read_csv('/Users/chrispellegrino/Desktop/CS 767 Machine Learning/Assignments/Assignment 3/Assignment_3_Hitters.csv')
- hitters = hitters.dropna()
- print(hitters)
- print(hitters.dtypes)
- #converts column to category so python knows each unique value is category
- hitters['League'] = hitters.League.astype('category').cat.codes
- hitters['NewLeague'] = hitters.NewLeague.astype('category').cat.codes
- hitters['Division'] = hitters.Division.astype('category').cat.codes
- #iloc means accessing by number, and loc is by name
- x = hitters.loc[:, ['AtBat', 'Hits', 'HmRun', 'Runs', 'RBI', 'Walks', 'Years',
- 'CAtBat', 'CHits', 'CHmRun', 'CRuns', 'CRBI', 'CWalks', 'League',
- 'Division', 'PutOuts', 'Assists', 'Errors', 'NewLeague']]
- at_bat = hitters.loc[:, ['AtBat']]
- hits = hitters.loc[:, ['Hits']]
- home_runs = hitters.loc[:, ['HmRun']]
- runs = hitters.loc[:, ['Runs']]
- rbi = hitters.loc[:, ['RBI']]
- walks = hitters.loc[:, ['Walks']]
- years = hitters.loc[:, ['Years']]
- catbat = hitters.loc[:, ['CAtBat']]
- chits = hitters.loc[:, ['CHits']]
- chmrun = hitters.loc[:, ['CHmRun']]
- cruns = hitters.loc[:, ['CRuns']]
- crbi = hitters.loc[:, ['CRBI']]
- cwalks = hitters.loc[:, ['CWalks']]
- league = hitters.loc[:, ['League']]
- division = hitters.loc[:, ['League']]
- putouts = hitters.loc[:, ['PutOuts']]
- assists = hitters.loc[:, ['Assists']]
- errors = hitters.loc[:, ['Errors']]
- new_league = hitters.loc[:, ['NewLeague']]
- y = hitters.loc[:, ['Salary']]
- x = x.to_numpy()
- y = y.to_numpy()
- #convert every row into a numpy array
- x = [
- np.array(at_bat).reshape(1, 263),
- np.array(hits).reshape(1, 263),
- np.array(home_runs).reshape(1, 263),
- np.array(rbi).reshape(1, 263),
- np.array(walks).reshape(1, 263),
- np.array(years).reshape(1, 263),
- np.array(catbat).reshape(1, 263),
- np.array(chits).reshape(1, 263),
- np.array(chmrun).reshape(1, 263),
- np.array(cruns).reshape(1, 263),
- np.array(crbi).reshape(1, 263),
- np.array(cwalks).reshape(1, 263),
- np.array(league).reshape(1, 263),
- np.array(division).reshape(1, 263),
- np.array(putouts).reshape(1, 263),
- np.array(assists).reshape(1, 263),
- np.array(errors).reshape(1, 263),
- np.array(new_league).reshape(1, 263)
- ]
- '''
- def generate_weight(x, y):
- list1 =[]
- for i in range(x * y):
- list1.append(np.random.randn())
- return(np.array(list1).reshape(x, y)) #reshape keeps dimensions of weight (20, 5)
- #20 features and 5 selected arbitrarily
- weight1 = generate_weight(20, 5)
- weight2 = generate_weight(5, 1) #1 because we are just trying to predict one output
- #leaky relu
- def leaky_relu(x):
- x = np.where(x > 0, x, x * 0.01)
- return x
- #feed foward, x is the feature
- def feed_forward(x, weight1, weight2):
- # input layer to hidden layer
- input_layer_input = x.dot(weight1)# input from layer 1
- input_layer_output = leaky_relu(input_layer_input)# output of layer 2 (output is the hidden layer)
- # hidden layer to output layer
- hidden_layer_input = input_layer_output.dot(weight2)# input of 1st output layer
- hidden_layer_output = leaky_relu(hidden_layer_input)# output of out layer
- return(hidden_layer_output)
- # for loss we will be using mean square error(MSE)
- def loss(out, Y): #out is the output from feed forward, and Y is actualy salary
- s =(np.square(out-Y))
- s = np.sum(s)/len(Y)
- return(s)
- # Back propagation of error
- def back_prop(x, y, weight1, weight2, learning_rate):
- # output layer back to hidden layer
- output_layer_input = x.dot(weight1)# input from layer 1
- output_layer_output = leaky_relu(output_layer_input)# output of layer 2
- # hidden layer back to input layer
- hidden_layer_input2 = output_layer_output.dot(weight2)# input of out layer
- hidden_layer_output2 = leaky_relu(hidden_layer_input2)# output of out layer
- # error in output layer
- d2 =(hidden_layer_output2-y)
- d1 = np.multiply((weight2.dot((d2.transpose()))).transpose(),
- (np.multiply(output_layer_output, 1-output_layer_output)))
- # Gradient for weight1 and weight2
- weight1_adj = x.transpose().dot(d1)
- weight2_adj = output_layer_output.transpose().dot(d2)
- # Updating parameters
- weight1 = weight1-(learning_rate*(weight1_adj))
- weight2 = weight2-(learning_rate*(weight2_adj))
- return(weight1, weight2)
- #train is method to measure accuracy of model
- def train(x, Y, weight1, weight2, learning_rate = 0.01, epoch = 10):
- accuracy_training_set =[]
- test_set =[]
- for j in range(epoch):
- list2 =[]
- for i in range(len(x)): #looping through all features
- out = feed_forward(x[i], weight1, weight2) #running through feed forward
- list2.append((test_set(out, Y[i]))) #adding output to the list
- weight1, weight2 = back_prop(x[i], Y[i], weight1, weight2, learning_rate) #back propogate to get new weights and update network
- accuracy_training_set.append((1-(sum(list2)/len(x)))*100)
- test_set.append(sum(list2)/len(x))
- return(accuracy_training_set, test_set, weight1, weight2)
- def predict_salary(x, weight1, weight2):
- Out = feed_forward(x, weight1, weight2)
- for i in range(len(Out[0])):
- Out[0[i]]
- # x is take all the features except salary, which is Y
- accuracy_training_set, test_set, weight1, weight2 = train(x, y, weight1, weight2, 0.01, 100)
- # ploting accuracy
- plt.plot(accuracy_training_set)
- plt.ylabel('Accuracy Training Set')
- plt.xlabel('Epochs/Iterations:')
- plt.show()
- # plotting Loss
- plt.plot(test_set)
- plt.ylabel('Test Set')
- plt.xlabel('Epochs/Iterations:')
- plt.show()
- #predict salary of players
- predict_salary(x[i], weight1, weight2)
- '''
Add Comment
Please, Sign In to add comment